From c1f54478050279663217fe5ffa4bc90f60e7030d Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 14 May 2011 16:10:30 +0200
Subject: [PATCH 001/830] configure: use same CPPFLAGS in kFreeBSD as Linux

046f081b46c8479820409cf8f530b988221bd15b reorganized the CPPFLAGS to no
longer add -D_POSIX_C_SOURCE unconditionally, but only on systems (e.g.,
glibc based ones) that require it.  As kFreeBSD uses glibc, it needs to
be treated similar.

Additionally, _BSD_SOURCE is turned on to enable some additional types
such as caddr_t, which are normally enabled on BSD but not with glibc.
---
 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index d527ed1974..970ec5669b 100755
--- a/configure
+++ b/configure
@@ -2487,6 +2487,7 @@ case $target_os in
         enable dos_paths
         ;;
     gnu/kfreebsd)
+        add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 -D_BSD_SOURCE
         ;;
     gnu)
         ;;

From 10931720cd55d83e0b933b8a9bb0795fd9e48875 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 13 May 2011 15:42:31 +0200
Subject: [PATCH 002/830] imgutils: generalize linesize computation for
 bitstream formats

Make it a subcase of the general algorithm used for the non-bitstream
case. Simplify, and make av_image_get_linesize() and
av_image_fill_linesizes() correctly return the right value when plane
!= 0.

In particular fix a crash occurring with:
-vf format=monow,showinfo,format=monow.
---
 libavutil/imgutils.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 8eefa4d494..0df8de4255 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -48,14 +48,14 @@ int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane)
     const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
     int max_step     [4];       /* max pixel step for each plane */
     int max_step_comp[4];       /* the component for each plane which has the max pixel step */
-    int s;
-
-    if (desc->flags & PIX_FMT_BITSTREAM)
-        return (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
+    int s, linesize;
 
     av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
     s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
-    return max_step[plane] * (((width + (1 << s) - 1)) >> s);
+    linesize = max_step[plane] * (((width + (1 << s) - 1)) >> s);
+    if (desc->flags & PIX_FMT_BITSTREAM)
+        linesize = (linesize + 7) >> 3;
+    return linesize;
 }
 
 int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
@@ -70,13 +70,6 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
     if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
         return AVERROR(EINVAL);
 
-    if (desc->flags & PIX_FMT_BITSTREAM) {
-        if (width > (INT_MAX -7) / (desc->comp[0].step_minus1+1))
-            return AVERROR(EINVAL);
-        linesizes[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
-        return 0;
-    }
-
     av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
     for (i = 0; i < 4; i++) {
         int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
@@ -84,6 +77,8 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
         if (max_step[i] > INT_MAX / shifted_w)
             return AVERROR(EINVAL);
         linesizes[i] = max_step[i] * shifted_w;
+        if (desc->flags & PIX_FMT_BITSTREAM)
+            linesizes[i] = (linesizes[i] + 7) >> 3;
     }
 
     return 0;

From bb82ea797fb6538308af84310118d8006b150318 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 14 May 2011 13:20:52 +0200
Subject: [PATCH 003/830] showinfo: fix computation of Adler checksum

Previously the code was computing the checksum only for the first line
of each plane.
---
 libavfilter/vf_showinfo.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index d512199602..30e1108b13 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -43,13 +43,19 @@ static void end_frame(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     ShowInfoContext *showinfo = ctx->priv;
     AVFilterBufferRef *picref = inlink->cur_buf;
-    uint32_t plane_crc[4], crc = 0;
-    int plane;
+    uint32_t plane_crc[4] = {0}, crc = 0;
+    int i, plane, vsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_h;
 
-    for (plane = 0; plane < 4; plane++) {
+    for (plane = 0; picref->data[plane] && plane < 4; plane++) {
         size_t linesize = av_image_get_linesize(picref->format, picref->video->w, plane);
-        plane_crc[plane] = av_adler32_update(0  , picref->data[plane], linesize);
-        crc              = av_adler32_update(crc, picref->data[plane], linesize);
+        uint8_t *data = picref->data[plane];
+        int h = plane == 1 || plane == 2 ? inlink->h >> vsub : inlink->h;
+
+        for (i = 0; i < h; i++) {
+            plane_crc[plane] = av_adler32_update(plane_crc[plane], data, linesize);
+            crc = av_adler32_update(crc, data, linesize);
+            data += picref->linesize[plane];
+        }
     }
 
     av_log(ctx, AV_LOG_INFO,

From 5a2ea3cffb1ca6c99244b6a9b240cdbac27a0928 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 14 May 2011 15:09:54 +0200
Subject: [PATCH 004/830] showinfo: fix vertical align nit

---
 libavfilter/vf_showinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 30e1108b13..6568b238a7 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -89,7 +89,7 @@ AVFilter avfilter_vf_showinfo = {
                                     .get_video_buffer = avfilter_null_get_video_buffer,
                                     .start_frame      = avfilter_null_start_frame,
                                     .end_frame        = end_frame,
-                                    .min_perms       = AV_PERM_READ, },
+                                    .min_perms        = AV_PERM_READ, },
                                   { .name = NULL}},
 
     .outputs   = (AVFilterPad[]) {{ .name             = "default",

From a05d02079e0fde1ff9b6abfda79ff20b38f68439 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 14 May 2011 15:19:26 +0200
Subject: [PATCH 005/830] showinfo: replace "CRC" by "checksum"

Indeed the Adler-32 checksum, which is computed by showinfo, is not
cyclic, so using the term "CRC" is wrong/confusing.
---
 doc/filters.texi          | 10 +++++-----
 libavfilter/vf_showinfo.c | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index a2557403b3..523e279d46 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1360,12 +1360,12 @@ Check also the documentation of the @code{AVPictureType} enum and of
 the @code{av_get_picture_type_char} function defined in
 @file{libavutil/avutil.h}.
 
-@item crc
-Adler-32 CRC of the input frame
+@item checksum
+Adler-32 checksum of all the planes of the input frame
 
-@item plane_crc
-Adler-32 CRC of each plane of the input frame, expressed in the form
-"[@var{crc0} @var{crc1} @var{crc2} @var{crc3}]"
+@item plane_checksum
+Adler-32 checksum of each plane of the input frame, expressed in the form
+"[@var{c0} @var{c1} @var{c2} @var{c3}]"
 @end table
 
 @section slicify
diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 6568b238a7..82aa3b9901 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -43,7 +43,7 @@ static void end_frame(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     ShowInfoContext *showinfo = ctx->priv;
     AVFilterBufferRef *picref = inlink->cur_buf;
-    uint32_t plane_crc[4] = {0}, crc = 0;
+    uint32_t plane_checksum[4] = {0}, checksum = 0;
     int i, plane, vsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_h;
 
     for (plane = 0; picref->data[plane] && plane < 4; plane++) {
@@ -52,8 +52,8 @@ static void end_frame(AVFilterLink *inlink)
         int h = plane == 1 || plane == 2 ? inlink->h >> vsub : inlink->h;
 
         for (i = 0; i < h; i++) {
-            plane_crc[plane] = av_adler32_update(plane_crc[plane], data, linesize);
-            crc = av_adler32_update(crc, data, linesize);
+            plane_checksum[plane] = av_adler32_update(plane_checksum[plane], data, linesize);
+            checksum = av_adler32_update(checksum, data, linesize);
             data += picref->linesize[plane];
         }
     }
@@ -61,7 +61,7 @@ static void end_frame(AVFilterLink *inlink)
     av_log(ctx, AV_LOG_INFO,
            "n:%d pts:%"PRId64" pts_time:%f pos:%"PRId64" "
            "fmt:%s sar:%d/%d s:%dx%d i:%c iskey:%d type:%c "
-           "crc:%u plane_crc:[%u %u %u %u]\n",
+           "checksum:%u plane_checksum:[%u %u %u %u]\n",
            showinfo->frame,
            picref->pts, picref ->pts * av_q2d(inlink->time_base), picref->pos,
            av_pix_fmt_descriptors[picref->format].name,
@@ -71,7 +71,7 @@ static void end_frame(AVFilterLink *inlink)
            picref->video->top_field_first ? 'T' : 'B',    /* Top / Bottom */
            picref->video->key_frame,
            av_get_picture_type_char(picref->video->pict_type),
-           crc, plane_crc[0], plane_crc[1], plane_crc[2], plane_crc[3]);
+           checksum, plane_checksum[0], plane_checksum[1], plane_checksum[2], plane_checksum[3]);
 
     showinfo->frame++;
     avfilter_end_frame(inlink->dst->outputs[0]);

From 27614b121776aa2b32579808810fb95839627bd9 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 14 May 2011 19:47:55 +0200
Subject: [PATCH 006/830] rawdec: propagate pict_type information to the output
 frame

---
 libavcodec/rawdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 385bdf4a6a..bdf5674873 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -121,6 +121,7 @@ static int raw_decode(AVCodecContext *avctx,
     AVFrame * frame = (AVFrame *) data;
     AVPicture * picture = (AVPicture *) data;
 
+    frame->pict_type        = avctx->coded_frame->pict_type;
     frame->interlaced_frame = avctx->coded_frame->interlaced_frame;
     frame->top_field_first = avctx->coded_frame->top_field_first;
     frame->reordered_opaque = avctx->reordered_opaque;

From c73b779d3576754f883ad5d5968cfae131752f40 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 11:46:18 +0200
Subject: [PATCH 007/830] configure: Include AVX availability in summary
 output.

---
 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index 970ec5669b..d6a5d69c6c 100755
--- a/configure
+++ b/configure
@@ -3100,6 +3100,7 @@ if enabled x86; then
     echo "3DNow! extended enabled   ${amd3dnowext-no}"
     echo "SSE enabled               ${sse-no}"
     echo "SSSE3 enabled             ${ssse3-no}"
+    echo "AVX enabled               ${avx-no}"
     echo "CMOV enabled              ${cmov-no}"
     echo "CMOV is fast              ${fast_cmov-no}"
     echo "EBX available             ${ebx_available-no}"

From b1bb3b8d875900b6ea6a560d9336e4d4b3f8fb37 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 13:12:47 +0200
Subject: [PATCH 008/830] dfa: Remove unused variable.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the warning:
libavcodec/dfa.c:189: warning: unused variable ‘frame_end’
---
 libavcodec/dfa.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c
index 1556bc7acb..919375baf0 100644
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c
@@ -186,7 +186,6 @@ static int decode_dds1(uint8_t *frame, int width, int height,
 static int decode_bdlt(uint8_t *frame, int width, int height,
                        const uint8_t *src, const uint8_t *src_end)
 {
-    const uint8_t *frame_end = frame + width * height;
     uint8_t *line_ptr;
     int count, lines, segments;
 

From 7f995abed362be3ae54c6f0464cf00b2c89b7678 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 29 Apr 2011 19:42:00 +0200
Subject: [PATCH 009/830] acelp: Remove unused gray_decode table.

---
 libavcodec/acelp_vectors.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index 8e59e4beab..25a6ff27df 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -93,16 +93,6 @@ const uint8_t ff_fc_4pulses_8bits_track_4[32] =
     78, 79,
 };
 
-#if 0
-static uint8_t gray_decode[32] =
-{
-    0,  1,  3,  2,  7,  6,  4,  5,
-   15, 14, 12, 13,  8,  9, 11, 10,
-   31, 30, 28, 29, 24, 25, 27, 26,
-   16, 17, 19, 18, 23, 22, 20, 21
-};
-#endif
-
 const float ff_pow_0_7[10] = {
     0.700000, 0.490000, 0.343000, 0.240100, 0.168070,
     0.117649, 0.082354, 0.057648, 0.040354, 0.028248

From a80f74c584296971c6e6b7e0c10d9b5ec0440857 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 15 May 2011 22:37:02 +0200
Subject: [PATCH 010/830] av_picture_crop(): Support simple cases with packed
 pixels too.

This fixes a regression when linked to old ffmpeg.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/imgconvert.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 59630dfb09..a86d2bd027 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -790,15 +790,23 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
     int y_shift;
     int x_shift;
 
-    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt]))
+    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
         return -1;
 
     y_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h;
     x_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_w;
 
+    if (is_yuv_planar(&pix_fmt_info[pix_fmt])) {
     dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
     dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift);
     dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift);
+    } else{
+        if(top_band % (1<<y_shift) || left_band % (1<<x_shift))
+            return -1;
+        if(left_band) //FIXME add support for this too
+            return -1;
+        dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
+    }
 
     dst->linesize[0] = src->linesize[0];
     dst->linesize[1] = src->linesize[1];

From 033a4a942a81a1880ca5a89e7eb3a2b5f529a7fb Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Sun, 15 May 2011 11:07:29 -0400
Subject: [PATCH 011/830] aacdec: Use float instead of int16_t for ltp_state to
 avoid needless rounding.

---
 libavcodec/aac.h    | 2 +-
 libavcodec/aacdec.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index ecb8191566..76b6a7821b 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -227,7 +227,7 @@ typedef struct {
     DECLARE_ALIGNED(32, float,   coeffs)[1024];     ///< coefficients for IMDCT
     DECLARE_ALIGNED(32, float,   saved)[1024];      ///< overlap
     DECLARE_ALIGNED(32, float,   ret)[2048];        ///< PCM output
-    DECLARE_ALIGNED(16, int16_t, ltp_state)[3072];  ///< time signal for LTP
+    DECLARE_ALIGNED(16, float,   ltp_state)[3072];  ///< time signal for LTP
     PredictorState predictor_state[MAX_PREDICTORS];
 } SingleChannelElement;
 
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index d26cce994c..5f9dd834a0 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -1820,9 +1820,9 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce)
             saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
     }
 
-    memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t));
-    ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret,  1024);
-    ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024);
+    memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
+    memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
+    memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
 }
 
 /**

From 350f3d145be685768292f751ec4bec8439bf1113 Mon Sep 17 00:00:00 2001
From: Hanspeter Niederstrasser <niederstrasser@gmail.com>
Date: Mon, 16 May 2011 02:57:15 +0200
Subject: [PATCH 012/830] configure: another try on fixing osx/mingw SDL

---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index a7aad19011..fd7d5759f9 100755
--- a/configure
+++ b/configure
@@ -2943,7 +2943,7 @@ SDL_CONFIG="${cross_prefix}sdl-config"
 if "${SDL_CONFIG}" --version > /dev/null 2>&1; then
     sdl_cflags=$("${SDL_CONFIG}" --cflags)
     sdl_libs=$("${SDL_CONFIG}" --libs)
-    check_func_headers SDL.h SDL_Init $sdl_cflags $sdl_libs &&
+    check_func_headers SDL_version.h SDL_Linked_Version $sdl_cflags $sdl_libs &&
     check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags &&
     enable sdl &&
     check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size

From d43a6edf7d62fb24f96697a7ad31cf585db4724c Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Mon, 16 May 2011 11:24:23 +0200
Subject: [PATCH 013/830] Bump minor after adding a caf muxer.

---
 libavformat/version.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/version.h b/libavformat/version.h
index 80c579f7ab..dde8aa9dec 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -24,8 +24,8 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
-#define LIBAVFORMAT_VERSION_MINOR  0
-#define LIBAVFORMAT_VERSION_MICRO  3
+#define LIBAVFORMAT_VERSION_MINOR  1
+#define LIBAVFORMAT_VERSION_MICRO  0
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 0eba7fc2935fb5d1aba949d89be4049669d1e621 Mon Sep 17 00:00:00 2001
From: ami_stuff <ami_stuff@o2.pl>
Date: Mon, 16 May 2011 11:25:57 +0200
Subject: [PATCH 014/830] Support decoding of 2bpp rawvideo in avi (ticket
 206).

---
 libavcodec/rawdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index bdf5674873..cdafa00b15 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -39,6 +39,7 @@ typedef struct RawVideoContext {
 } RawVideoContext;
 
 static const PixelFormatTag pix_fmt_bps_avi[] = {
+    { PIX_FMT_PAL8,    2 },
     { PIX_FMT_PAL8,    4 },
     { PIX_FMT_PAL8,    8 },
     { PIX_FMT_RGB444, 12 },

From 85eedcf6ff46b5d3e593a16ebcdd84af278e6d83 Mon Sep 17 00:00:00 2001
From: ami_stuff <ami_stuff@o2.pl>
Date: Mon, 16 May 2011 11:26:45 +0200
Subject: [PATCH 015/830] Support decoding of 1bpp rawvideo in avi (ticket
 205).

---
 libavcodec/rawdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index cdafa00b15..3055a50475 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -39,6 +39,7 @@ typedef struct RawVideoContext {
 } RawVideoContext;
 
 static const PixelFormatTag pix_fmt_bps_avi[] = {
+    { PIX_FMT_MONOWHITE, 1 },
     { PIX_FMT_PAL8,    2 },
     { PIX_FMT_PAL8,    4 },
     { PIX_FMT_PAL8,    8 },

From d39bf3df729bed5f5df5d973ecc7110434416fe3 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 03:34:20 +0200
Subject: [PATCH 016/830] Remove unused header mpegaudio3.h.

The header is a part of an MP3 encoder that never saw the light of day.
---
 libavcodec/Makefile     |  1 -
 libavcodec/mpegaudio3.h | 53 -----------------------------------------
 2 files changed, 54 deletions(-)
 delete mode 100644 libavcodec/mpegaudio3.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9040b32f57..aaf9ceb4f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -656,7 +656,6 @@ SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_internal.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h
 SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
-SKIPHEADERS                            += mpegaudio3.h
 
 EXAMPLES = api
 
diff --git a/libavcodec/mpegaudio3.h b/libavcodec/mpegaudio3.h
deleted file mode 100644
index 7047652f6e..0000000000
--- a/libavcodec/mpegaudio3.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2007 Michael Niedermayer
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* layer 3 "granule" */
-typedef struct GranuleDef {
-    uint8_t scfsi;
-    int part2_3_length;
-    int big_values;
-    int global_gain;
-    int scalefac_compress;
-    uint8_t block_type;
-    uint8_t switch_point;
-    int table_select[3];
-    int subblock_gain[3];
-    uint8_t scalefac_scale;
-    uint8_t count1table_select;
-    int region_size[3]; /* number of huffman codes in each region */
-    int preflag;
-    int short_start, long_end; /* long/short band indexes */
-    uint8_t scale_factors[40];
-    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
-} GranuleDef;
-
-void ff_mp3_init(void);
-
-/**
- * Compute huffman coded region sizes.
- */
-void ff_init_short_region(MPADecodeContext *s, GranuleDef *g);
-
-/**
- * Compute huffman coded region sizes.
- */
-void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2);
-
-void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g);

From 257de5fb25454209ccb3fd152d1ff3c98813e2ce Mon Sep 17 00:00:00 2001
From: Gil Pedersen <gil@cmi.aau.dk>
Date: Mon, 16 May 2011 14:40:56 +0200
Subject: [PATCH 017/830] h264dsp_mmx: Add #ifdefs around some mmxext functions
 on x86_64.

This fixes linking errors due to undefined symbols on x86_64 OS X.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/h264dsp_mmx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 01b11163c8..1c07d14cd0 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -252,6 +252,7 @@ LF_IFUNC(v,  chroma_intra, depth,  avx)
 LF_FUNCS( uint8_t,  8)
 LF_FUNCS(uint16_t, 10)
 
+#if ARCH_X86_32
 LF_FUNC (v8, luma,             8, mmxext)
 static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
@@ -266,6 +267,7 @@ static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha
     ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
     ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
 }
+#endif /* ARCH_X86_32 */
 
 LF_FUNC (v,  luma,            10, mmxext)
 LF_IFUNC(v,  luma_intra,      10, mmxext)

From 68bed67d2eaabcfa7eaf00442312055f7d953b69 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 16 May 2011 01:51:04 +0200
Subject: [PATCH 018/830] pngdec: relax condition for setting monoblack pixel
 format

Ignore color_type information for setting the monoblack pixel format,
only rely on bit_depth.

In particular: fix Test_1bpp_grayscale.png from issue #172.

This may work fine for some broken encoder, and not for others.
---
 libavcodec/pngdec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 100b60cd1e..9732396dc6 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -467,8 +467,7 @@ static int decode_frame(AVCodecContext *avctx,
                 } else if (s->bit_depth == 16 &&
                            s->color_type == PNG_COLOR_TYPE_RGB) {
                     avctx->pix_fmt = PIX_FMT_RGB48BE;
-                } else if (s->bit_depth == 1 &&
-                           s->color_type == PNG_COLOR_TYPE_GRAY) {
+                } else if (s->bit_depth == 1) {
                     avctx->pix_fmt = PIX_FMT_MONOBLACK;
                 } else if (s->color_type == PNG_COLOR_TYPE_PALETTE) {
                     avctx->pix_fmt = PIX_FMT_PAL8;
@@ -504,7 +503,7 @@ static int decode_frame(AVCodecContext *avctx,
                 s->image_buf = p->data[0];
                 s->image_linesize = p->linesize[0];
                 /* copy the palette if needed */
-                if (s->color_type == PNG_COLOR_TYPE_PALETTE)
+                if (avctx->pix_fmt == PIX_FMT_PAL8)
                     memcpy(p->data[1], s->palette, 256 * sizeof(uint32_t));
                 /* empty row is used if differencing to the first row */
                 s->last_row = av_mallocz(s->row_size);

From 721d6f2dc5437df21ae17923b29fa2be847764c7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 15:57:04 +0100
Subject: [PATCH 019/830] dct: bypass table allocation for DCT_II of size 32

The size-32 DCT_II has a special implementation which doesn't use
the normal tables.  Skipping allocation of these in this case saves
some memory.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dct.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index 83ea00f9cb..e7a8f227b7 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -180,9 +180,14 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
     int n = 1 << nbits;
     int i;
 
+    memset(s, 0, sizeof(*s));
+
     s->nbits    = nbits;
     s->inverse  = inverse;
 
+    if (inverse == DCT_II && nbits == 5) {
+        s->dct_calc = dct32_func;
+    } else {
     ff_init_ff_cos_tabs(nbits+2);
 
     s->costab = ff_cos_tabs[nbits+2];
@@ -203,9 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
     case DCT_III: s->dct_calc = ff_dct_calc_III_c; break;
     case DST_I  : s->dct_calc = ff_dst_calc_I_c; break;
     }
-
-    if (inverse == DCT_II && nbits == 5)
-        s->dct_calc = dct32_func;
+    }
 
     s->dct32 = dct32;
     if (HAVE_MMX)     ff_dct_init_mmx(s);

From 9503fbb859d859fada35c966af8d4765a8b819fa Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 15:57:36 +0100
Subject: [PATCH 020/830] dct: fix indentation

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dct.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index e7a8f227b7..caa6bdb4b4 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -188,26 +188,26 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
     if (inverse == DCT_II && nbits == 5) {
         s->dct_calc = dct32_func;
     } else {
-    ff_init_ff_cos_tabs(nbits+2);
+        ff_init_ff_cos_tabs(nbits+2);
 
-    s->costab = ff_cos_tabs[nbits+2];
+        s->costab = ff_cos_tabs[nbits+2];
 
-    s->csc2 = av_malloc(n/2 * sizeof(FFTSample));
+        s->csc2 = av_malloc(n/2 * sizeof(FFTSample));
 
-    if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
-        av_free(s->csc2);
-        return -1;
-    }
+        if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
+            av_free(s->csc2);
+            return -1;
+        }
 
-    for (i = 0; i < n/2; i++)
-        s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1)));
+        for (i = 0; i < n/2; i++)
+            s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1)));
 
-    switch(inverse) {
-    case DCT_I  : s->dct_calc = ff_dct_calc_I_c; break;
-    case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break;
-    case DCT_III: s->dct_calc = ff_dct_calc_III_c; break;
-    case DST_I  : s->dct_calc = ff_dst_calc_I_c; break;
-    }
+        switch(inverse) {
+        case DCT_I  : s->dct_calc = ff_dct_calc_I_c; break;
+        case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break;
+        case DCT_III: s->dct_calc = ff_dct_calc_III_c; break;
+        case DST_I  : s->dct_calc = ff_dst_calc_I_c; break;
+        }
     }
 
     s->dct32 = dct32;

From 5026f946fda58b209334a40319af7c42ceb985fe Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 15:23:22 +0100
Subject: [PATCH 021/830] Add missing #includes to mp3_header_(de)compress bsf

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mp3_header_compress_bsf.c   | 1 +
 libavcodec/mp3_header_decompress_bsf.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libavcodec/mp3_header_compress_bsf.c b/libavcodec/mp3_header_compress_bsf.c
index 5a693774f9..c880e5e53d 100644
--- a/libavcodec/mp3_header_compress_bsf.c
+++ b/libavcodec/mp3_header_compress_bsf.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "mpegaudio.h"
 
diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c
index 7dda795db5..b4b4167620 100644
--- a/libavcodec/mp3_header_decompress_bsf.c
+++ b/libavcodec/mp3_header_decompress_bsf.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "mpegaudio.h"
 #include "mpegaudiodata.h"

From 92ea249d7db4baf64680f412c6bd99ff85860723 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 17:13:23 +0100
Subject: [PATCH 022/830] mpegaudio: remove OUT_MIN/MAX macros

These macros are no longer needed after the s32 output was removed.
Change the relevant code to use av_clip_int16() instead of using
explicit limits.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h    | 2 --
 libavcodec/mpegaudiodec.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index f12b897e23..47d10e91fa 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -70,8 +70,6 @@ typedef float OUT_INT;
 #define OUT_FMT AV_SAMPLE_FMT_FLT
 #else
 typedef int16_t OUT_INT;
-#define OUT_MAX INT16_MAX
-#define OUT_MIN INT16_MIN
 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 #define OUT_FMT AV_SAMPLE_FMT_S16
 #endif
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 7fd6bd2dc6..10a63c57be 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -490,7 +490,7 @@ static inline int round_sample(int64_t *sum)
     int sum1;
     sum1 = (int)((*sum) >> OUT_SHIFT);
     *sum &= (1<<OUT_SHIFT)-1;
-    return av_clip(sum1, OUT_MIN, OUT_MAX);
+    return av_clip_int16(sum1);
 }
 
 #   define MULS(ra, rb) MUL64(ra, rb)

From bdefbf3e8857d2861d8d57c0ef583fe15a46d1a4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 17:06:30 +0100
Subject: [PATCH 023/830] mpegaudio: move OUT_FMT macro to mpegaudiodec.c

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h    | 2 --
 libavcodec/mpegaudiodec.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 47d10e91fa..2c3f2ec065 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -67,11 +67,9 @@
 
 #if CONFIG_FLOAT
 typedef float OUT_INT;
-#define OUT_FMT AV_SAMPLE_FMT_FLT
 #else
 typedef int16_t OUT_INT;
 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
-#define OUT_FMT AV_SAMPLE_FMT_S16
 #endif
 
 #if CONFIG_FLOAT
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 10a63c57be..8c42e09666 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -47,6 +47,7 @@
 #   define MULH3(x, y, s) ((s)*(y)*(x))
 #   define MULLx(x, y, s) ((y)*(x))
 #   define RENAME(a) a ## _float
+#   define OUT_FMT AV_SAMPLE_FMT_FLT
 #else
 #   define SHR(a,b)       ((a)>>(b))
 #   define compute_antialias compute_antialias_integer
@@ -57,6 +58,7 @@
 #   define MULH3(x, y, s) MULH((s)*(x), y)
 #   define MULLx(x, y, s) MULL(x,y,s)
 #   define RENAME(a)      a
+#   define OUT_FMT AV_SAMPLE_FMT_S16
 #endif
 
 /****************/

From d39facc783c270227e5b7c75db3dec406ed19018 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 18:34:11 +0200
Subject: [PATCH 024/830] tools: Check the return value of write().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes several warnings of the type:
warning: ignoring return value of ‘write’, declared with attribute warn_unused_result
---
 tools/cws2fws.c   | 15 ++++++++++++---
 tools/pktdumper.c |  6 +++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/tools/cws2fws.c b/tools/cws2fws.c
index aa7d690be3..5fa51470df 100644
--- a/tools/cws2fws.c
+++ b/tools/cws2fws.c
@@ -69,7 +69,10 @@ int main(int argc, char *argv[])
 
     // write out modified header
     buf_in[0] = 'F';
-    write(fd_out, &buf_in, 8);
+    if (write(fd_out, &buf_in, 8) < 8) {
+        perror("Error writing output file");
+        exit(1);
+    }
 
     zstream.zalloc = NULL;
     zstream.zfree = NULL;
@@ -101,7 +104,10 @@ int main(int argc, char *argv[])
             zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
             zstream.total_out-last_out);
 
-        write(fd_out, &buf_out, zstream.total_out-last_out);
+        if (write(fd_out, &buf_out, zstream.total_out - last_out) < zstream.total_out - last_out) {
+            perror("Error writing output file");
+            exit(1);
+        }
 
         i += len;
 
@@ -120,7 +126,10 @@ int main(int argc, char *argv[])
         buf_in[3] = ((zstream.total_out+8) >> 24) & 0xff;
 
         lseek(fd_out, 4, SEEK_SET);
-        write(fd_out, &buf_in, 4);
+        if (write(fd_out, &buf_in, 4) < 4) {
+            perror("Error writing output file");
+            exit(1);
+        }
     }
 
     inflateEnd(&zstream);
diff --git a/tools/pktdumper.c b/tools/pktdumper.c
index 3ab39ee675..80816d24b9 100644
--- a/tools/pktdumper.c
+++ b/tools/pktdumper.c
@@ -104,7 +104,11 @@ int main(int argc, char **argv)
         //printf("open(\"%s\")\n", pktfilename);
         if (!nowrite) {
             fd = open(pktfilename, O_WRONLY|O_CREAT, 0644);
-            write(fd, pkt.data, pkt.size);
+            err = write(fd, pkt.data, pkt.size);
+            if (err < 0) {
+                fprintf(stderr, "write: error %d\n", err);
+                return 1;
+            }
             close(fd);
         }
         av_free_packet(&pkt);

From c540061f3f552daa3724289b59b0a7a3692ad740 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 22:17:35 +0200
Subject: [PATCH 025/830] cws2fws: Improve error message wording.

---
 tools/cws2fws.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/cws2fws.c b/tools/cws2fws.c
index 5fa51470df..b8535feaa4 100644
--- a/tools/cws2fws.c
+++ b/tools/cws2fws.c
@@ -35,14 +35,14 @@ int main(int argc, char *argv[])
     fd_in = open(argv[1], O_RDONLY);
     if (fd_in < 0)
     {
-        perror("Error while opening: ");
+        perror("Error opening input file");
         exit(1);
     }
 
     fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644);
     if (fd_out < 0)
     {
-        perror("Error while opening: ");
+        perror("Error opening output file");
         close(fd_in);
         exit(1);
     }

From 5319f48a5753772e5c04c022a0ed903d8ceecbd1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 16 May 2011 21:00:06 +0200
Subject: [PATCH 026/830] cmdutils: Allocate private decoder context if its not
 allocated yet.

This fixes and simplifies setting decoder private options.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 cmdutils.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index 11391af361..a5363b8176 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -411,13 +411,24 @@ int opt_timelimit(const char *opt, const char *arg)
     return 0;
 }
 
+static void *alloc_priv_context(int size, AVClass *class){
+    void *p = av_mallocz(size);
+    if (p) {
+        *(AVClass**)p = class;
+        av_opt_set_defaults(p);
+    }
+    return p;
+}
+
 void set_context_opts(void *ctx, void *opts_ctx, int flags, AVCodec *codec)
 {
     int i;
     void *priv_ctx=NULL;
     if(!strcmp("AVCodecContext", (*(AVClass**)ctx)->class_name)){
         AVCodecContext *avctx= ctx;
-        if(codec && codec->priv_class && avctx->priv_data){
+        if(codec && codec->priv_class){
+            if(!avctx->priv_data && codec->priv_data_size)
+                avctx->priv_data= alloc_priv_context(codec->priv_data_size, codec->priv_class);
             priv_ctx= avctx->priv_data;
         }
     } else if (!strcmp("AVFormatContext", (*(AVClass**)ctx)->class_name)) {

From 198783744e181f047925bf829c69a2a35b85f6ab Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 16 May 2011 21:20:35 +0200
Subject: [PATCH 027/830] ffmpeg: initialize input_codec array earlier.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index 629d202257..d0a5d04660 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3378,6 +3378,8 @@ static void opt_input_file(const char *filename)
         switch (dec->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(audio_codec_name);
+            if(!input_codecs[nb_input_codecs-1])
+                input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id);
             set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]);
             channel_layout    = dec->channel_layout;
             audio_channels    = dec->channels;
@@ -3393,6 +3395,8 @@ static void opt_input_file(const char *filename)
             break;
         case AVMEDIA_TYPE_VIDEO:
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(video_codec_name);
+            if(!input_codecs[nb_input_codecs-1])
+                input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id);
             set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_VIDEO], AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]);
             frame_height = dec->height;
             frame_width  = dec->width;
@@ -3430,6 +3434,8 @@ static void opt_input_file(const char *filename)
             break;
         case AVMEDIA_TYPE_SUBTITLE:
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(subtitle_codec_name);
+            if(!input_codecs[nb_input_codecs-1])
+                input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id);
             if(subtitle_disable)
                 st->discard = AVDISCARD_ALL;
             break;

From 9763420bcc4a50a4c6e9ce2ee46f10de0bc7760c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 16 May 2011 21:52:35 +0200
Subject: [PATCH 028/830] rawdec: Allow overriding top field first.

Iam not sure this is the best way to implement it, but its the simplest
and keeps the code seperate from the application. Keeping ffmpeg.c
simple and not requireing user apps to duplicate this code.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c            |  1 +
 libavcodec/rawdec.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index d0a5d04660..5373b7593f 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2979,6 +2979,7 @@ static int opt_qscale(const char *opt, const char *arg)
 static int opt_top_field_first(const char *opt, const char *arg)
 {
     top_field_first = parse_number_or_die(opt, arg, OPT_INT, 0, 1);
+    opt_default(opt, arg);
     return 0;
 }
 
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 3055a50475..6bf749f4c3 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -29,15 +29,24 @@
 #include "raw.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 
 typedef struct RawVideoContext {
+    AVClass *av_class;
     uint32_t palette[AVPALETTE_COUNT];
     unsigned char * buffer;  /* block of memory for holding one frame */
     int             length;  /* number of bytes in buffer */
     int flip;
     AVFrame pic;             ///< AVCodecContext.coded_frame
+    int tff;
 } RawVideoContext;
 
+static const AVOption options[]={
+{"top", "top field first", offsetof(RawVideoContext, tff), FF_OPT_TYPE_INT, {.dbl = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM},
+{NULL}
+};
+static const AVClass class = { "rawdec", NULL, options, LIBAVUTIL_VERSION_INT };
+
 static const PixelFormatTag pix_fmt_bps_avi[] = {
     { PIX_FMT_MONOWHITE, 1 },
     { PIX_FMT_PAL8,    2 },
@@ -130,6 +139,11 @@ static int raw_decode(AVCodecContext *avctx,
     frame->pkt_pts          = avctx->pkt->pts;
     frame->pkt_pos          = avctx->pkt->pos;
 
+    if(context->tff>=0){
+        frame->interlaced_frame = 1;
+        frame->top_field_first  = context->tff;
+    }
+
     //2bpp and 4bpp raw in avi and mov (yes this is ugly ...)
     if (context->buffer) {
         int i;
@@ -214,4 +228,5 @@ AVCodec ff_rawvideo_decoder = {
     raw_close_decoder,
     raw_decode,
     .long_name = NULL_IF_CONFIG_SMALL("raw video"),
+    .priv_class= &class,
 };

From 005db470115ebe2c973688bed9695356f487d674 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 21:31:06 +0100
Subject: [PATCH 029/830] mathops: remove ancient confusing comment

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mathops.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 547bc1aa4f..d74bc1ed70 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -45,9 +45,6 @@
 #endif
 
 #ifndef MULH
-//gcc 3.4 creates an incredibly bloated mess out of this
-//#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
-
 static av_always_inline int MULH(int a, int b){
     return ((int64_t)(a) * (int64_t)(b))>>32;
 }

From 5dc65a3d0374ffd85e5ff1c89f5917d392897920 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 23 Apr 2011 19:55:59 +0200
Subject: [PATCH 030/830] lavfi: print key-frame and picture type information
 in ff_dlog_ref()

Signed-off-by: Stefano Sabatini <stefano.sabatini-lala@poste.it>
(cherry picked from commit f7bdffb09da597c5d6afff5359523370470ad072)
---
 libavfilter/avfilter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 82350d1790..02915036ab 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -237,11 +237,13 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end)
             ref->pts, ref->pos);
 
     if (ref->video) {
-        av_dlog(ctx, " a:%d/%d s:%dx%d i:%c",
+        av_dlog(ctx, " a:%d/%d s:%dx%d i:%c iskey:%d type:%c",
                 ref->video->pixel_aspect.num, ref->video->pixel_aspect.den,
                 ref->video->w, ref->video->h,
                 !ref->video->interlaced     ? 'P' :         /* Progressive  */
-                ref->video->top_field_first ? 'T' : 'B');   /* Top / Bottom */
+                ref->video->top_field_first ? 'T' : 'B',    /* Top / Bottom */
+                ref->video->key_frame,
+                av_get_picture_type_char(ref->video->pict_type));
     }
     if (ref->audio) {
         av_dlog(ctx, " cl:%"PRId64"d sn:%d s:%d sr:%d p:%d",

From 3a7c977417f7904a6213048ed3e57dd79264d3d5 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 12 Apr 2011 12:06:49 +0200
Subject: [PATCH 031/830] ffplay: remove audio_write_get_buf_size() forward
 declaration

Move up the definition of audio_write_get_buf_size(), so that it is
defined before it is used. Simplify.
(cherry picked from commit 8776f3d22e401e30d17856e341f6cabbbefa92f7)
---
 ffplay.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index e820c603e3..a5dc358f72 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -218,7 +218,6 @@ typedef struct VideoState {
 } VideoState;
 
 static void show_help(void);
-static int audio_write_get_buf_size(VideoState *is);
 
 /* options specified by the user */
 static AVInputFormat *file_iformat;
@@ -768,6 +767,13 @@ static void video_image_display(VideoState *is)
     }
 }
 
+/* get the current audio output buffer size, in samples. With SDL, we
+   cannot have a precise information */
+static int audio_write_get_buf_size(VideoState *is)
+{
+    return is->audio_buf_size - is->audio_buf_index;
+}
+
 static inline int compute_mod(int a, int b)
 {
     a = a % b;
@@ -2146,14 +2152,6 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
     }
 }
 
-/* get the current audio output buffer size, in samples. With SDL, we
-   cannot have a precise information */
-static int audio_write_get_buf_size(VideoState *is)
-{
-    return is->audio_buf_size - is->audio_buf_index;
-}
-
-
 /* prepare a new audio buffer */
 static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
 {

From 9d5fa6182dd2e41d5c174b29ef2a1a2f83a02d23 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 14 Feb 2011 23:02:10 +0100
Subject: [PATCH 032/830] Restructure video filter implementation in ffmpeg.c.

This fixes several bugs like multiple outputs and -aspect mixed with -vf

(cherry picked from commit 1762d9ced70ccc46c5d3e5d64e56a48d0fbbd4f7)
(cherry picked from commit 5c20c81bfa526b3a269db9c88b0c9007861f0917)
(cherry picked from commit a7844c580d83d8466c161a0e3979b3902d0d9100)
---
 ffmpeg.c | 139 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 80 insertions(+), 59 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 2a8a7d0a68..828e7f1414 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -161,7 +161,6 @@ static int loop_output = AVFMT_NOOUTPUTLOOP;
 static int qp_hist = 0;
 #if CONFIG_AVFILTER
 static char *vfilters = NULL;
-static AVFilterGraph *graph = NULL;
 #endif
 
 static int intra_only = 0;
@@ -291,6 +290,14 @@ typedef struct AVOutputStream {
     AVFifoBuffer *fifo;     /* for compression: one audio fifo per codec */
     FILE *logfile;
 
+#if CONFIG_AVFILTER
+    AVFilterContext *output_video_filter;
+    AVFilterContext *input_video_filter;
+    AVFilterBufferRef *picref;
+    char *avfilter;
+    AVFilterGraph *graph;
+#endif
+
    int sws_flags;
 } AVOutputStream;
 
@@ -314,11 +321,8 @@ typedef struct AVInputStream {
     int showed_multi_packet_warning;
     int is_past_recording_time;
 #if CONFIG_AVFILTER
-    AVFilterContext *output_video_filter;
-    AVFilterContext *input_video_filter;
     AVFrame *filter_frame;
     int has_filter_frame;
-    AVFilterBufferRef *picref;
 #endif
 } AVInputStream;
 
@@ -342,7 +346,7 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
     char args[255];
     int ret;
 
-    graph = avfilter_graph_alloc();
+    ost->graph = avfilter_graph_alloc();
 
     if (ist->st->sample_aspect_ratio.num){
         sample_aspect_ratio = ist->st->sample_aspect_ratio;
@@ -353,15 +357,15 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
              ist->st->codec->height, ist->st->codec->pix_fmt, 1, AV_TIME_BASE,
              sample_aspect_ratio.num, sample_aspect_ratio.den);
 
-    ret = avfilter_graph_create_filter(&ist->input_video_filter, avfilter_get_by_name("buffer"),
-                                       "src", args, NULL, graph);
+    ret = avfilter_graph_create_filter(&ost->input_video_filter, avfilter_get_by_name("buffer"),
+                                       "src", args, NULL, ost->graph);
     if (ret < 0)
         return ret;
-    ret = avfilter_graph_create_filter(&ist->output_video_filter, &ffsink,
-                                       "out", NULL, &ffsink_ctx, graph);
+    ret = avfilter_graph_create_filter(&ost->output_video_filter, &ffsink,
+                                       "out", NULL, &ffsink_ctx, ost->graph);
     if (ret < 0)
         return ret;
-    last_filter = ist->input_video_filter;
+    last_filter = ost->input_video_filter;
 
     if (codec->width  != icodec->width || codec->height != icodec->height) {
         snprintf(args, 255, "%d:%d:flags=0x%X",
@@ -369,7 +373,7 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
                  codec->height,
                  ost->sws_flags);
         if ((ret = avfilter_graph_create_filter(&filter, avfilter_get_by_name("scale"),
-                                                NULL, args, NULL, graph)) < 0)
+                                                NULL, args, NULL, ost->graph)) < 0)
             return ret;
         if ((ret = avfilter_link(last_filter, 0, filter, 0)) < 0)
             return ret;
@@ -377,9 +381,9 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
     }
 
     snprintf(args, sizeof(args), "flags=0x%X", ost->sws_flags);
-    graph->scale_sws_opts = av_strdup(args);
+    ost->graph->scale_sws_opts = av_strdup(args);
 
-    if (vfilters) {
+    if (ost->avfilter) {
         AVFilterInOut *outputs = av_malloc(sizeof(AVFilterInOut));
         AVFilterInOut *inputs  = av_malloc(sizeof(AVFilterInOut));
 
@@ -389,25 +393,25 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
         outputs->next    = NULL;
 
         inputs->name    = av_strdup("out");
-        inputs->filter_ctx = ist->output_video_filter;
+        inputs->filter_ctx = ost->output_video_filter;
         inputs->pad_idx = 0;
         inputs->next    = NULL;
 
-        if ((ret = avfilter_graph_parse(graph, vfilters, inputs, outputs, NULL)) < 0)
+        if ((ret = avfilter_graph_parse(ost->graph, ost->avfilter, inputs, outputs, NULL)) < 0)
             return ret;
-        av_freep(&vfilters);
+        av_freep(&ost->avfilter);
     } else {
-        if ((ret = avfilter_link(last_filter, 0, ist->output_video_filter, 0)) < 0)
+        if ((ret = avfilter_link(last_filter, 0, ost->output_video_filter, 0)) < 0)
             return ret;
     }
 
-    if ((ret = avfilter_graph_config(graph, NULL)) < 0)
+    if ((ret = avfilter_graph_config(ost->graph, NULL)) < 0)
         return ret;
 
-    codec->width  = ist->output_video_filter->inputs[0]->w;
-    codec->height = ist->output_video_filter->inputs[0]->h;
+    codec->width  = ost->output_video_filter->inputs[0]->w;
+    codec->height = ost->output_video_filter->inputs[0]->h;
     codec->sample_aspect_ratio = ost->st->sample_aspect_ratio =
-        ist->output_video_filter->inputs[0]->sample_aspect_ratio;
+        ost->output_video_filter->inputs[0]->sample_aspect_ratio;
 
     return 0;
 }
@@ -1549,14 +1553,21 @@ static int output_packet(AVInputStream *ist, int ist_index,
         }
 
 #if CONFIG_AVFILTER
-        if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ist->input_video_filter) {
-            AVRational sar;
-            if (ist->st->sample_aspect_ratio.num) sar = ist->st->sample_aspect_ratio;
-            else                                  sar = ist->st->codec->sample_aspect_ratio;
-            // add it to be filtered
-            av_vsrc_buffer_add_frame(ist->input_video_filter, &picture,
-                                     ist->pts,
-                                     sar);
+        if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+            for (i = 0; i < nb_ostreams; i++) {
+                ost = ost_table[i];
+                if (ost->input_video_filter && ost->source_index == ist_index) {
+                    AVRational sar;
+                    if (ist->st->sample_aspect_ratio.num)
+                        sar = ist->st->sample_aspect_ratio;
+                    else
+                        sar = ist->st->codec->sample_aspect_ratio;
+                    // add it to be filtered
+                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture,
+                                             ist->pts,
+                                             sar);
+                }
+            }
         }
 #endif
 
@@ -1581,26 +1592,24 @@ static int output_packet(AVInputStream *ist, int ist_index,
             if (pts > now)
                 usleep(pts - now);
         }
-#if CONFIG_AVFILTER
-        frame_available = ist->st->codec->codec_type != AVMEDIA_TYPE_VIDEO ||
-            !ist->output_video_filter || avfilter_poll_frame(ist->output_video_filter->inputs[0]);
-#endif
         /* if output time reached then transcode raw format,
            encode packets and output them */
         if (start_time == 0 || ist->pts >= start_time)
-#if CONFIG_AVFILTER
-        while (frame_available) {
-            AVRational ist_pts_tb;
-            if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ist->output_video_filter)
-                get_filtered_video_frame(ist->output_video_filter, &picture, &ist->picref, &ist_pts_tb);
-            if (ist->picref)
-                ist->pts = av_rescale_q(ist->picref->pts, ist_pts_tb, AV_TIME_BASE_Q);
-#endif
             for(i=0;i<nb_ostreams;i++) {
                 int frame_size;
 
                 ost = ost_table[i];
                 if (ost->source_index == ist_index) {
+#if CONFIG_AVFILTER
+                frame_available = ist->st->codec->codec_type != AVMEDIA_TYPE_VIDEO ||
+                    !ost->output_video_filter || avfilter_poll_frame(ost->output_video_filter->inputs[0]);
+                while (frame_available) {
+                    AVRational ist_pts_tb;
+                    if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ost->output_video_filter)
+                        get_filtered_video_frame(ost->output_video_filter, &picture, &ost->picref, &ist_pts_tb);
+                    if (ost->picref)
+                        ist->pts = av_rescale_q(ost->picref->pts, ist_pts_tb, AV_TIME_BASE_Q);
+#endif
                     os = output_files[ost->file_index];
 
                     /* set the input output pts pairs */
@@ -1614,8 +1623,8 @@ static int output_packet(AVInputStream *ist, int ist_index,
                             break;
                         case AVMEDIA_TYPE_VIDEO:
 #if CONFIG_AVFILTER
-                            if (ist->picref->video)
-                                ost->st->codec->sample_aspect_ratio = ist->picref->video->pixel_aspect;
+                            if (ost->picref->video)
+                                ost->st->codec->sample_aspect_ratio = ost->picref->video->pixel_aspect;
 #endif
                             do_video_out(os, ost, ist, &picture, &frame_size);
                             if (vstats_filename && frame_size)
@@ -1636,7 +1645,11 @@ static int output_packet(AVInputStream *ist, int ist_index,
                         av_init_packet(&opkt);
 
                         if ((!ost->frame_number && !(pkt->flags & AV_PKT_FLAG_KEY)) && !copy_initial_nonkeyframes)
+#if !CONFIG_AVFILTER
                             continue;
+#else
+                            goto cont;
+#endif
 
                         /* no reencoding needed : output the packet directly */
                         /* force the input stream PTS */
@@ -1684,16 +1697,17 @@ static int output_packet(AVInputStream *ist, int ist_index,
                         ost->frame_number++;
                         av_free_packet(&opkt);
                     }
+#if CONFIG_AVFILTER
+                    cont:
+                    frame_available = (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) &&
+                                       ost->output_video_filter && avfilter_poll_frame(ost->output_video_filter->inputs[0]);
+                    if (ost->picref)
+                        avfilter_unref_buffer(ost->picref);
+                }
+#endif
                 }
             }
 
-#if CONFIG_AVFILTER
-            frame_available = (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) &&
-                              ist->output_video_filter && avfilter_poll_frame(ist->output_video_filter->inputs[0]);
-            if(ist->picref)
-                avfilter_unref_buffer(ist->picref);
-        }
-#endif
         av_free(buffer_to_free);
         /* XXX: allocate the subtitles in the codec ? */
         if (subtitle_to_free) {
@@ -2611,6 +2625,9 @@ static int transcode(AVFormatContext **output_files,
             av_freep(&ost->st->codec->stats_in);
             avcodec_close(ost->st->codec);
         }
+#if CONFIG_AVFILTER
+        avfilter_graph_free(&ost->graph);
+#endif
     }
 
     /* close each decoder */
@@ -2620,9 +2637,6 @@ static int transcode(AVFormatContext **output_files,
             avcodec_close(ist->st->codec);
         }
     }
-#if CONFIG_AVFILTER
-    avfilter_graph_free(&graph);
-#endif
 
     /* finished ! */
     ret = 0;
@@ -2765,12 +2779,6 @@ static void opt_frame_aspect_ratio(const char *arg)
         ffmpeg_exit(1);
     }
     frame_aspect_ratio = ar;
-
-#if CONFIG_AVFILTER
-    x = vfilters ? strlen(vfilters) : 0;
-    vfilters = av_realloc(vfilters, x+100);
-    snprintf(vfilters+x, x+100, "%csetdar=%f\n", x?',':' ', ar);
-#endif
 }
 
 static int opt_metadata(const char *opt, const char *arg)
@@ -3329,6 +3337,7 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
     AVCodecContext *video_enc;
     enum CodecID codec_id = CODEC_ID_NONE;
     AVCodec *codec= NULL;
+    int i;
 
     st = av_new_stream(oc, oc->nb_streams < nb_streamid_map ? streamid_map[oc->nb_streams] : 0);
     if (!st) {
@@ -3348,6 +3357,18 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
             codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, AVMEDIA_TYPE_VIDEO);
             codec = avcodec_find_encoder(codec_id);
         }
+
+#if CONFIG_AVFILTER
+        if (frame_aspect_ratio > 0){
+            i = vfilters ? strlen(vfilters) : 0;
+            vfilters = av_realloc(vfilters, i+100);
+            snprintf(vfilters+i, i+100, "%csetdar=%f\n", i?',':' ', frame_aspect_ratio);
+            frame_aspect_ratio=0;
+        }
+
+        ost->avfilter= vfilters;
+        vfilters = NULL;
+#endif
     }
 
     avcodec_get_context_defaults3(st->codec, codec);

From 901ff51116f831c9082e14c80c7481dd3999aa30 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 1 Apr 2011 17:30:45 +0200
Subject: [PATCH 033/830] ffmpeg: fix -aspect cli option

Redesign the way -aspect option is handled. This is done by making
ffmpeg read the sample aspect ratio set in the corresponding input
stream by default, and overriding it using the value specified by
-aspect.

If the output display aspect ratio is specified with -aspect, it is
set at the end of the filterchain, thus overriding the value set by
filters in the filterchain.

This implementation is more robust, since does not modify the
filterchain description (which was creating potential syntax errors).

(Cherry-pick abf8342aa94bdf06bb324f6723a6743dd628d5c6)

Another aspect ratio fix try. This leaves the setdar addition at the end
(preferred by people).

(Cherry-pick e7c7b0d000e81d24327602e04d8fed400dbb7193)
---
 ffmpeg.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 828e7f1414..c99c4de024 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -274,6 +274,8 @@ typedef struct AVOutputStream {
     int resample_width;
     int resample_pix_fmt;
 
+    float frame_aspect_ratio;
+
     /* forced key frames */
     int64_t *forced_kf_pts;
     int forced_kf_count;
@@ -411,6 +413,8 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
     codec->width  = ost->output_video_filter->inputs[0]->w;
     codec->height = ost->output_video_filter->inputs[0]->h;
     codec->sample_aspect_ratio = ost->st->sample_aspect_ratio =
+        ost->frame_aspect_ratio ? // overriden by the -aspect cli option
+        av_d2q(ost->frame_aspect_ratio*codec->height/codec->width, 255) :
         ost->output_video_filter->inputs[0]->sample_aspect_ratio;
 
     return 0;
@@ -1623,7 +1627,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
                             break;
                         case AVMEDIA_TYPE_VIDEO:
 #if CONFIG_AVFILTER
-                            if (ost->picref->video)
+                            if (ost->picref->video && !ost->frame_aspect_ratio)
                                 ost->st->codec->sample_aspect_ratio = ost->picref->video->pixel_aspect;
 #endif
                             do_video_out(os, ost, ist, &picture, &frame_size);
@@ -2132,6 +2136,13 @@ static int transcode(AVFormatContext **output_files,
                 codec->width = icodec->width;
                 codec->height = icodec->height;
                 codec->has_b_frames = icodec->has_b_frames;
+                if (!codec->sample_aspect_ratio.num) {
+                    codec->sample_aspect_ratio =
+                    ost->st->sample_aspect_ratio =
+                        ist->st->sample_aspect_ratio.num ? ist->st->sample_aspect_ratio :
+                        ist->st->codec->sample_aspect_ratio.num ?
+                        ist->st->codec->sample_aspect_ratio : (AVRational){0, 1};
+                }
                 break;
             case AVMEDIA_TYPE_SUBTITLE:
                 codec->width = icodec->width;
@@ -3220,11 +3231,6 @@ static void opt_input_file(const char *filename)
             set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_VIDEO], AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]);
             frame_height = dec->height;
             frame_width  = dec->width;
-            if(ic->streams[i]->sample_aspect_ratio.num)
-                frame_aspect_ratio=av_q2d(ic->streams[i]->sample_aspect_ratio);
-            else
-                frame_aspect_ratio=av_q2d(dec->sample_aspect_ratio);
-            frame_aspect_ratio *= (float) dec->width / dec->height;
             frame_pix_fmt = dec->pix_fmt;
             rfps      = ic->streams[i]->r_frame_rate.num;
             rfps_base = ic->streams[i]->r_frame_rate.den;
@@ -3337,7 +3343,6 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
     AVCodecContext *video_enc;
     enum CodecID codec_id = CODEC_ID_NONE;
     AVCodec *codec= NULL;
-    int i;
 
     st = av_new_stream(oc, oc->nb_streams < nb_streamid_map ? streamid_map[oc->nb_streams] : 0);
     if (!st) {
@@ -3358,14 +3363,9 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
             codec = avcodec_find_encoder(codec_id);
         }
 
+        ost->frame_aspect_ratio = frame_aspect_ratio;
+        frame_aspect_ratio = 0;
 #if CONFIG_AVFILTER
-        if (frame_aspect_ratio > 0){
-            i = vfilters ? strlen(vfilters) : 0;
-            vfilters = av_realloc(vfilters, i+100);
-            snprintf(vfilters+i, i+100, "%csetdar=%f\n", i?',':' ', frame_aspect_ratio);
-            frame_aspect_ratio=0;
-        }
-
         ost->avfilter= vfilters;
         vfilters = NULL;
 #endif
@@ -3412,7 +3412,6 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
 
         video_enc->width = frame_width;
         video_enc->height = frame_height;
-        video_enc->sample_aspect_ratio = av_d2q(frame_aspect_ratio*video_enc->height/video_enc->width, 255);
         video_enc->pix_fmt = frame_pix_fmt;
         st->sample_aspect_ratio = video_enc->sample_aspect_ratio;
 

From ce207e050e38352541531e2f09d62c2f54680063 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 16 May 2011 20:11:50 +0200
Subject: [PATCH 034/830] drawtext: fix strftime() text expansion

The feature was dropped after the filter was partially rewritten and
recommitted.

Fix issue #207.
---
 libavfilter/vf_drawtext.c | 45 +++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index b26029bb8f..3013cc74bf 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -49,9 +49,11 @@ typedef struct {
     const AVClass *class;
     uint8_t *fontfile;              ///< font to be used
     uint8_t *text;                  ///< text to be drawn
-    uint8_t *text_priv;             ///< used to detect whether text changed
+    uint8_t *expanded_text;         ///< used to contain the strftime()-expanded text
+    size_t   expanded_text_size;    ///< size in bytes of the expanded_text buffer
     int ft_load_flags;              ///< flags used for loading fonts, see FT_LOAD_*
     FT_Vector *positions;           ///< positions for each element in the text
+    size_t nb_positions;            ///< number of elements of positions array
     char *textfile;                 ///< file with text to be drawn
     unsigned int x;                 ///< x position to start drawing text
     unsigned int y;                 ///< y position to start drawing text
@@ -349,6 +351,7 @@ static av_cold void uninit(AVFilterContext *ctx)
 
     av_freep(&dtext->fontfile);
     av_freep(&dtext->text);
+    av_freep(&dtext->expanded_text);
     av_freep(&dtext->fontcolor_string);
     av_freep(&dtext->boxcolor_string);
     av_freep(&dtext->positions);
@@ -517,7 +520,7 @@ static inline int is_newline(uint32_t c)
 static int draw_glyphs(DrawTextContext *dtext, AVFilterBufferRef *picref,
                        int width, int height, const uint8_t rgbcolor[4], const uint8_t yuvcolor[4], int x, int y)
 {
-    char *text = dtext->text;
+    char *text = HAVE_LOCALTIME_R ? dtext->expanded_text : dtext->text;
     uint32_t code = 0;
     int i;
     uint8_t *p;
@@ -559,45 +562,51 @@ static int draw_text(AVFilterContext *ctx, AVFilterBufferRef *picref,
     uint32_t code = 0, prev_code = 0;
     int x = 0, y = 0, i = 0, ret;
     int text_height, baseline;
+    char *text = dtext->text;
     uint8_t *p;
-    int str_w = 0;
+    int str_w = 0, len;
     int y_min = 32000, y_max = -32000;
     FT_Vector delta;
     Glyph *glyph = NULL, *prev_glyph = NULL;
     Glyph dummy = { 0 };
 
-    if (dtext->text != dtext->text_priv) {
 #if HAVE_LOCALTIME_R
         time_t now = time(0);
         struct tm ltime;
-        uint8_t *buf = NULL;
-        int     buflen = 2*strlen(dtext->text) + 1, len;
+        uint8_t *buf = dtext->expanded_text;
+        int buf_size = dtext->expanded_text_size;
+
+        if (!buf) {
+            buf_size = 2*strlen(dtext->text)+1;
+            buf = av_malloc(buf_size);
+        }
 
         localtime_r(&now, &ltime);
 
-        while ((buf = av_realloc(buf, buflen))) {
+        do {
             *buf = 1;
-            if ((len = strftime(buf, buflen, dtext->text, &ltime)) != 0 || *buf == 0)
+            if (strftime(buf, buf_size, dtext->text, &ltime) != 0 || *buf == 0)
                 break;
-            buflen *= 2;
-        }
+            buf_size *= 2;
+        } while ((buf = av_realloc(buf, buf_size)));
+
         if (!buf)
             return AVERROR(ENOMEM);
-        av_freep(&dtext->text);
-        dtext->text = dtext->text_priv = buf;
-#else
-        dtext->text_priv = dtext->text;
+        text = dtext->expanded_text = buf;
+        dtext->expanded_text_size = buf_size;
 #endif
-        if (!(dtext->positions = av_realloc(dtext->positions,
-                                            strlen(dtext->text)*sizeof(*dtext->positions))))
+    if ((len = strlen(text)) > dtext->nb_positions) {
+        if (!(dtext->positions =
+              av_realloc(dtext->positions, len*sizeof(*dtext->positions))))
             return AVERROR(ENOMEM);
+        dtext->nb_positions = len;
     }
 
     x = dtext->x;
     y = dtext->y;
 
     /* load and cache glyphs */
-    for (i = 0, p = dtext->text; *p; i++) {
+    for (i = 0, p = text; *p; i++) {
         GET_UTF8(code, *p++, continue;);
 
         /* get glyph */
@@ -614,7 +623,7 @@ static int draw_text(AVFilterContext *ctx, AVFilterBufferRef *picref,
 
     /* compute and save position for each glyph */
     glyph = NULL;
-    for (i = 0, p = dtext->text; *p; i++) {
+    for (i = 0, p = text; *p; i++) {
         GET_UTF8(code, *p++, continue;);
 
         /* skip the \n in the sequence \r\n */

From e8ea9c21790660256942c66a74bf992287f8bb7b Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 16 May 2011 23:48:00 +0200
Subject: [PATCH 035/830] drawtext: reindent after the previous commit

---
 libavfilter/vf_drawtext.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index 3013cc74bf..4f25140130 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -571,29 +571,29 @@ static int draw_text(AVFilterContext *ctx, AVFilterBufferRef *picref,
     Glyph dummy = { 0 };
 
 #if HAVE_LOCALTIME_R
-        time_t now = time(0);
-        struct tm ltime;
-        uint8_t *buf = dtext->expanded_text;
-        int buf_size = dtext->expanded_text_size;
+    time_t now = time(0);
+    struct tm ltime;
+    uint8_t *buf = dtext->expanded_text;
+    int buf_size = dtext->expanded_text_size;
 
-        if (!buf) {
-            buf_size = 2*strlen(dtext->text)+1;
-            buf = av_malloc(buf_size);
-        }
+    if (!buf) {
+        buf_size = 2*strlen(dtext->text)+1;
+        buf = av_malloc(buf_size);
+    }
 
-        localtime_r(&now, &ltime);
+    localtime_r(&now, &ltime);
 
-        do {
-            *buf = 1;
-            if (strftime(buf, buf_size, dtext->text, &ltime) != 0 || *buf == 0)
-                break;
-            buf_size *= 2;
-        } while ((buf = av_realloc(buf, buf_size)));
+    do {
+        *buf = 1;
+        if (strftime(buf, buf_size, dtext->text, &ltime) != 0 || *buf == 0)
+            break;
+        buf_size *= 2;
+    } while ((buf = av_realloc(buf, buf_size)));
 
-        if (!buf)
-            return AVERROR(ENOMEM);
-        text = dtext->expanded_text = buf;
-        dtext->expanded_text_size = buf_size;
+    if (!buf)
+        return AVERROR(ENOMEM);
+    text = dtext->expanded_text = buf;
+    dtext->expanded_text_size = buf_size;
 #endif
     if ((len = strlen(text)) > dtext->nb_positions) {
         if (!(dtext->positions =

From d8c7a216024e1408a1f865227cdfd371ddbe1d59 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 16 May 2011 23:44:35 +0200
Subject: [PATCH 036/830] drawtext: specify union type for setting default
 options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warnings of the type:
vf_drawtext.c:NNN: warning: missing braces around initializer
vf_drawtext.c:NNN: warning: (near initialization for ‘drawtext_options[X].default_val’)
---
 libavfilter/vf_drawtext.c | 58 +++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index 4f25140130..cf0eb43344 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -86,37 +86,37 @@ typedef struct {
 #define OFFSET(x) offsetof(DrawTextContext, x)
 
 static const AVOption drawtext_options[]= {
-{"fontfile", "set font file",        OFFSET(fontfile),         FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"text",     "set text",             OFFSET(text),             FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"textfile", "set text file",        OFFSET(textfile),         FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"fontcolor","set foreground color", OFFSET(fontcolor_string), FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"boxcolor", "set box color",        OFFSET(boxcolor_string),  FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"shadowcolor", "set shadow color",  OFFSET(shadowcolor_string),  FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"box",      "set box",              OFFSET(draw_box),         FF_OPT_TYPE_INT,    0,         0,        1 },
-{"fontsize", "set font size",        OFFSET(fontsize),         FF_OPT_TYPE_INT,   16,         1,       72 },
-{"x",        "set x",                OFFSET(x),                FF_OPT_TYPE_INT,    0,         0,  INT_MAX },
-{"y",        "set y",                OFFSET(y),                FF_OPT_TYPE_INT,    0,         0,  INT_MAX },
-{"shadowx",  "set x",                OFFSET(shadowx),          FF_OPT_TYPE_INT,    0,   INT_MIN,  INT_MAX },
-{"shadowy",  "set y",                OFFSET(shadowy),          FF_OPT_TYPE_INT,    0,   INT_MIN,  INT_MAX },
-{"tabsize",  "set tab size",         OFFSET(tabsize),          FF_OPT_TYPE_INT,    4,         0,  INT_MAX },
+{"fontfile", "set font file",        OFFSET(fontfile),           FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"text",     "set text",             OFFSET(text),               FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"textfile", "set text file",        OFFSET(textfile),           FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"fontcolor","set foreground color", OFFSET(fontcolor_string),   FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"boxcolor", "set box color",        OFFSET(boxcolor_string),    FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"shadowcolor", "set shadow color",  OFFSET(shadowcolor_string), FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
+{"box",      "set box",              OFFSET(draw_box),           FF_OPT_TYPE_INT,    {.dbl=0},     0,        1        },
+{"fontsize", "set font size",        OFFSET(fontsize),           FF_OPT_TYPE_INT,    {.dbl=16},    1,        72       },
+{"x",        "set x",                OFFSET(x),                  FF_OPT_TYPE_INT,    {.dbl=0},     0,        INT_MAX  },
+{"y",        "set y",                OFFSET(y),                  FF_OPT_TYPE_INT,    {.dbl=0},     0,        INT_MAX  },
+{"shadowx",  "set x",                OFFSET(shadowx),            FF_OPT_TYPE_INT,    {.dbl=0},     INT_MIN,  INT_MAX  },
+{"shadowy",  "set y",                OFFSET(shadowy),            FF_OPT_TYPE_INT,    {.dbl=0},     INT_MIN,  INT_MAX  },
+{"tabsize",  "set tab size",         OFFSET(tabsize),            FF_OPT_TYPE_INT,    {.dbl=4},     0,        INT_MAX  },
 
 /* FT_LOAD_* flags */
-{"ft_load_flags", "set font loading flags for libfreetype",   OFFSET(ft_load_flags),  FF_OPT_TYPE_FLAGS,  FT_LOAD_DEFAULT|FT_LOAD_RENDER, 0, INT_MAX, 0, "ft_load_flags" },
-{"default",                     "set default",                     0, FF_OPT_TYPE_CONST, FT_LOAD_DEFAULT,                     INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"no_scale",                    "set no_scale",                    0, FF_OPT_TYPE_CONST, FT_LOAD_NO_SCALE,                    INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"no_hinting",                  "set no_hinting",                  0, FF_OPT_TYPE_CONST, FT_LOAD_NO_HINTING,                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"render",                      "set render",                      0, FF_OPT_TYPE_CONST, FT_LOAD_RENDER,                      INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"no_bitmap",                   "set no_bitmap",                   0, FF_OPT_TYPE_CONST, FT_LOAD_NO_BITMAP,                   INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"vertical_layout",             "set vertical_layout",             0, FF_OPT_TYPE_CONST, FT_LOAD_VERTICAL_LAYOUT,             INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"force_autohint",              "set force_autohint",              0, FF_OPT_TYPE_CONST, FT_LOAD_FORCE_AUTOHINT,              INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"crop_bitmap",                 "set crop_bitmap",                 0, FF_OPT_TYPE_CONST, FT_LOAD_CROP_BITMAP,                 INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"pedantic",                    "set pedantic",                    0, FF_OPT_TYPE_CONST, FT_LOAD_PEDANTIC,                    INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"ignore_global_advance_width", "set ignore_global_advance_width", 0, FF_OPT_TYPE_CONST, FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"no_recurse",                  "set no_recurse",                  0, FF_OPT_TYPE_CONST, FT_LOAD_NO_RECURSE,                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"ignore_transform",            "set ignore_transform",            0, FF_OPT_TYPE_CONST, FT_LOAD_IGNORE_TRANSFORM,            INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"monochrome",                  "set monochrome",                  0, FF_OPT_TYPE_CONST, FT_LOAD_MONOCHROME,                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"linear_design",               "set linear_design",               0, FF_OPT_TYPE_CONST, FT_LOAD_LINEAR_DESIGN,               INT_MIN, INT_MAX, 0, "ft_load_flags" },
-{"no_autohint",                 "set no_autohint",                 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_AUTOHINT,                 INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"ft_load_flags", "set font loading flags for libfreetype",   OFFSET(ft_load_flags),  FF_OPT_TYPE_FLAGS,  {.dbl=FT_LOAD_DEFAULT|FT_LOAD_RENDER}, 0, INT_MAX, 0, "ft_load_flags" },
+{"default",                     "set default",                     0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_DEFAULT},                     INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"no_scale",                    "set no_scale",                    0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_SCALE},                    INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"no_hinting",                  "set no_hinting",                  0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_HINTING},                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"render",                      "set render",                      0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_RENDER},                      INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"no_bitmap",                   "set no_bitmap",                   0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_BITMAP},                   INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"vertical_layout",             "set vertical_layout",             0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_VERTICAL_LAYOUT},             INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"force_autohint",              "set force_autohint",              0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_FORCE_AUTOHINT},              INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"crop_bitmap",                 "set crop_bitmap",                 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_CROP_BITMAP},                 INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"pedantic",                    "set pedantic",                    0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_PEDANTIC},                    INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"ignore_global_advance_width", "set ignore_global_advance_width", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH}, INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"no_recurse",                  "set no_recurse",                  0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_RECURSE},                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"ignore_transform",            "set ignore_transform",            0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_IGNORE_TRANSFORM},            INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"monochrome",                  "set monochrome",                  0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_MONOCHROME},                  INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"linear_design",               "set linear_design",               0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_LINEAR_DESIGN},               INT_MIN, INT_MAX, 0, "ft_load_flags" },
+{"no_autohint",                 "set no_autohint",                 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_AUTOHINT},                 INT_MIN, INT_MAX, 0, "ft_load_flags" },
 {NULL},
 };
 

From 0d73227c712eca2247e06d7a3c2daeb6f6f2a128 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 13 May 2011 16:27:35 +0200
Subject: [PATCH 037/830] imgutils: make av_image_get_linesize() return
 AVERROR(EINVAL) for invalid pixel formats

---
 libavutil/imgutils.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 0df8de4255..3103efbe4a 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -50,6 +50,9 @@ int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane)
     int max_step_comp[4];       /* the component for each plane which has the max pixel step */
     int s, linesize;
 
+    if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
+        return AVERROR(EINVAL);
+
     av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
     s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
     linesize = max_step[plane] * (((width + (1 << s) - 1)) >> s);

From c0170d09738c74280af78c6f64914c52a9b6e075 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 15 May 2011 12:47:06 +0200
Subject: [PATCH 038/830] imgutils: introduce internal image_get_linesize() and
 use it

Allow to factorize code between av_image_get_linesize() and
av_image_fill_linesizes(), and add missing checks.

Increase robustness.
---
 libavutil/imgutils.c | 46 +++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 3103efbe4a..8581150771 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -43,27 +43,41 @@ void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
     }
 }
 
-int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane)
+static inline
+int image_get_linesize(int width, int plane,
+                       int max_step, int max_step_comp,
+                       const AVPixFmtDescriptor *desc)
 {
-    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
-    int max_step     [4];       /* max pixel step for each plane */
-    int max_step_comp[4];       /* the component for each plane which has the max pixel step */
-    int s, linesize;
+    int s, shifted_w, linesize;
 
-    if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
+    if (width < 0)
         return AVERROR(EINVAL);
-
-    av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
-    s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
-    linesize = max_step[plane] * (((width + (1 << s) - 1)) >> s);
+    s = (max_step_comp == 1 || max_step_comp == 2) ? desc->log2_chroma_w : 0;
+    shifted_w = ((width + (1 << s) - 1)) >> s;
+    if (shifted_w && max_step > INT_MAX / shifted_w)
+        return AVERROR(EINVAL);
+    linesize = max_step * shifted_w;
     if (desc->flags & PIX_FMT_BITSTREAM)
         linesize = (linesize + 7) >> 3;
     return linesize;
 }
 
+int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane)
+{
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+    int max_step     [4];       /* max pixel step for each plane */
+    int max_step_comp[4];       /* the component for each plane which has the max pixel step */
+
+    if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
+        return AVERROR(EINVAL);
+
+    av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
+    return image_get_linesize(width, plane, max_step[plane], max_step_comp[plane], desc);
+}
+
 int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
 {
-    int i;
+    int i, ret;
     const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
     int max_step     [4];       /* max pixel step for each plane */
     int max_step_comp[4];       /* the component for each plane which has the max pixel step */
@@ -75,13 +89,9 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
 
     av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
     for (i = 0; i < 4; i++) {
-        int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
-        int shifted_w = ((width + (1 << s) - 1)) >> s;
-        if (max_step[i] > INT_MAX / shifted_w)
-            return AVERROR(EINVAL);
-        linesizes[i] = max_step[i] * shifted_w;
-        if (desc->flags & PIX_FMT_BITSTREAM)
-            linesizes[i] = (linesizes[i] + 7) >> 3;
+        if ((ret = image_get_linesize(width, i, max_step[i], max_step_comp[i], desc)) < 0)
+            return ret;
+        linesizes[i] = ret;
     }
 
     return 0;

From 1af99b0292b7e27f989ecf817a09e2b29976490f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 10 May 2011 15:33:36 +0200
Subject: [PATCH 039/830] iff: distinguish fields for audio and video
 compression

Create separate fields 8svx_compression (for audio compression), and
bitmap_compression (for video compression), and perform minor related
logging tweaks.

Improve clarity, also simplify the case when both types of compression
are employed in the same file.
---
 libavformat/iff.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/libavformat/iff.c b/libavformat/iff.c
index da4e858501..54226dc3de 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -88,7 +88,8 @@ typedef struct {
     uint32_t  body_size;
     uint32_t  sent_bytes;
     uint32_t  audio_frame_count;
-    unsigned  compression;  ///< delta compression method used
+    svx8_compression_type   svx8_compression;
+    bitmap_compression_type bitmap_compression;  ///< delta compression method used
     unsigned  bpp;          ///< bits per plane to decode (differs from bits_per_coded_sample if HAM)
     unsigned  ham;          ///< 0 if non-HAM or number of hold bits (6 for bpp > 6, 4 otherwise)
     unsigned  flags;        ///< 1 for EHB, 0 is no extra half darkening
@@ -146,7 +147,6 @@ static int iff_read_header(AVFormatContext *s,
     AVStream *st;
     uint8_t *buf;
     uint32_t chunk_id, data_size;
-    int compression = -1;
     uint32_t screenmode = 0;
     unsigned transparency = 0;
     unsigned masking = 0; // no mask
@@ -178,7 +178,7 @@ static int iff_read_header(AVFormatContext *s,
             st->codec->sample_rate = avio_rb16(pb);
             if (data_size >= 16) {
                 avio_skip(pb, 1);
-                compression        = avio_r8(pb);
+                iff->svx8_compression = avio_r8(pb);
             }
             break;
 
@@ -209,6 +209,7 @@ static int iff_read_header(AVFormatContext *s,
             break;
 
         case ID_BMHD:
+            iff->bitmap_compression = -1;
             st->codec->codec_type            = AVMEDIA_TYPE_VIDEO;
             if (data_size <= 8)
                 return AVERROR_INVALIDDATA;
@@ -219,7 +220,7 @@ static int iff_read_header(AVFormatContext *s,
             if (data_size >= 10)
                 masking                      = avio_r8(pb);
             if (data_size >= 11)
-                compression                  = avio_r8(pb);
+                iff->bitmap_compression      = avio_r8(pb);
             if (data_size >= 14) {
                 avio_skip(pb, 1); // padding
                 transparency                 = avio_rb16(pb);
@@ -263,7 +264,7 @@ static int iff_read_header(AVFormatContext *s,
     case AVMEDIA_TYPE_AUDIO:
         av_set_pts_info(st, 32, 1, st->codec->sample_rate);
 
-        switch(compression) {
+        switch (iff->svx8_compression) {
         case COMP_NONE:
             st->codec->codec_id = CODEC_ID_PCM_S8;
             break;
@@ -274,7 +275,8 @@ static int iff_read_header(AVFormatContext *s,
             st->codec->codec_id = CODEC_ID_8SVX_EXP;
             break;
         default:
-            av_log(s, AV_LOG_ERROR, "unknown compression method\n");
+            av_log(s, AV_LOG_ERROR,
+                   "Unknown SVX8 compression method '%d'\n", iff->svx8_compression);
             return -1;
         }
 
@@ -284,7 +286,6 @@ static int iff_read_header(AVFormatContext *s,
         break;
 
     case AVMEDIA_TYPE_VIDEO:
-        iff->compression  = compression;
         iff->bpp          = st->codec->bits_per_coded_sample;
         if ((screenmode & 0x800 /* Hold And Modify */) && iff->bpp <= 8) {
             iff->ham      = iff->bpp > 6 ? 6 : 4;
@@ -302,14 +303,14 @@ static int iff_read_header(AVFormatContext *s,
         }
         buf = st->codec->extradata;
         bytestream_put_be16(&buf, IFF_EXTRA_VIDEO_SIZE);
-        bytestream_put_byte(&buf, iff->compression);
+        bytestream_put_byte(&buf, iff->bitmap_compression);
         bytestream_put_byte(&buf, iff->bpp);
         bytestream_put_byte(&buf, iff->ham);
         bytestream_put_byte(&buf, iff->flags);
         bytestream_put_be16(&buf, iff->transparency);
         bytestream_put_byte(&buf, iff->masking);
 
-        switch (compression) {
+        switch (iff->bitmap_compression) {
         case BITMAP_RAW:
             st->codec->codec_id = CODEC_ID_IFF_ILBM;
             break;
@@ -317,7 +318,8 @@ static int iff_read_header(AVFormatContext *s,
             st->codec->codec_id = CODEC_ID_IFF_BYTERUN1;
             break;
         default:
-            av_log(s, AV_LOG_ERROR, "unknown compression method\n");
+            av_log(s, AV_LOG_ERROR,
+                   "Unknown bitmap compression method '%d'\n", iff->bitmap_compression);
             return AVERROR_INVALIDDATA;
         }
         break;

From 34f590b29bc427036b6cb030bcc23d120316c20f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 10 May 2011 15:40:03 +0200
Subject: [PATCH 040/830] iff: fix bitrate computation for compressed audio
 stream

---
 libavformat/iff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/iff.c b/libavformat/iff.c
index 54226dc3de..b57f0ee279 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -280,7 +280,7 @@ static int iff_read_header(AVFormatContext *s,
             return -1;
         }
 
-        st->codec->bits_per_coded_sample = 8;
+        st->codec->bits_per_coded_sample = iff->svx8_compression == COMP_NONE ? 8 : 4;
         st->codec->bit_rate = st->codec->channels * st->codec->sample_rate * st->codec->bits_per_coded_sample;
         st->codec->block_align = st->codec->channels * st->codec->bits_per_coded_sample;
         break;

From d8353256a3ae9bbf76b9b080884b82566c4938ab Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 13 May 2011 23:28:18 +0200
Subject: [PATCH 041/830] iff: compact code setting metadata tags

Ease readability.
---
 libavformat/iff.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/libavformat/iff.c b/libavformat/iff.c
index b57f0ee279..b092de0b08 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -232,21 +232,10 @@ static int iff_read_header(AVFormatContext *s,
             break;
 
         case ID_ANNO:
-        case ID_TEXT:
-            metadata_tag = "comment";
-            break;
-
-        case ID_AUTH:
-            metadata_tag = "artist";
-            break;
-
-        case ID_COPYRIGHT:
-            metadata_tag = "copyright";
-            break;
-
-        case ID_NAME:
-            metadata_tag = "title";
-            break;
+        case ID_TEXT:      metadata_tag = "comment";   break;
+        case ID_AUTH:      metadata_tag = "artist";    break;
+        case ID_COPYRIGHT: metadata_tag = "copyright"; break;
+        case ID_NAME:      metadata_tag = "title";     break;
         }
 
         if (metadata_tag) {

From e280a4da2ae6fd44f0079358ecc5aa08e388a5ed Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 15 May 2011 13:24:46 +0200
Subject: [PATCH 042/830] iff/8svx: redesign 8SVX demuxing and decoding for
 handling stereo samples correctly

Make the iff demuxer send the whole audio chunk to the decoder as a
single packet, move stereo interleaving from the iff demuxer to the
decoder, and introduce an 8svx_raw decoder which performs
stereo interleaving.

This is required for handling stereo data correctly, indeed samples
are stored like:
LLLLLL....RRRRRR

that is all left samples are at the beginning of the chunk, all right
samples at the end, so it is necessary to store and process the whole
buffer in order to decode each frame. Thus the decoder needs all the
audio chunk before it can return interleaved data.

Fix decoding of files 8svx_exp.iff and 8svx_fib.iff, fix trac issue #169.
---
 libavcodec/8svx.c            | 179 ++++++++++++++++++++++++++++-------
 libavcodec/Makefile          |   1 +
 libavcodec/allcodecs.c       |   1 +
 libavcodec/avcodec.h         |   1 +
 libavcodec/version.h         |   2 +-
 libavformat/iff.c            |  40 +-------
 tests/ref/fate/iff-fibonacci |   2 +-
 7 files changed, 152 insertions(+), 74 deletions(-)

diff --git a/libavcodec/8svx.c b/libavcodec/8svx.c
index 4f95d9034e..5d94e005a2 100644
--- a/libavcodec/8svx.c
+++ b/libavcodec/8svx.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2008 Jaikrishnan Menon
+ * Copyright (C) 2011 Stefano Sabatini
  *
  * This file is part of FFmpeg.
  *
@@ -38,62 +39,155 @@
 
 /** decoder context */
 typedef struct EightSvxContext {
-    int16_t fib_acc;
-    const int16_t *table;
+    const int8_t *table;
+
+    /* buffer used to store the whole audio decoded/interleaved chunk,
+     * which is sent with the first packet */
+    uint8_t *samples;
+    size_t samples_size;
+    int samples_idx;
 } EightSvxContext;
 
-static const int16_t fibonacci[16]   = { -34<<8, -21<<8, -13<<8,  -8<<8, -5<<8, -3<<8, -2<<8, -1<<8,
-                                          0, 1<<8, 2<<8, 3<<8, 5<<8, 8<<8, 13<<8, 21<<8 };
-static const int16_t exponential[16] = { -128<<8, -64<<8, -32<<8, -16<<8, -8<<8, -4<<8, -2<<8, -1<<8,
-                                          0, 1<<8, 2<<8, 4<<8, 8<<8, 16<<8, 32<<8, 64<<8 };
+static const int8_t fibonacci[16]   = { -34,  -21, -13,  -8, -5, -3, -2, -1, 0, 1, 2, 3, 5, 8,  13, 21 };
+static const int8_t exponential[16] = { -128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64 };
+
+#define MAX_FRAME_SIZE 2048
+
+/**
+ * Interleave samples in buffer containing all left channel samples
+ * at the beginning, and right channel samples at the end.
+ * Each sample is assumed to be in signed 8-bit format.
+ *
+ * @param size the size in bytes of the dst and src buffer
+ */
+static void interleave_stereo(uint8_t *dst, const uint8_t *src, int size)
+{
+    uint8_t *dst_end = dst + size;
+    size = size>>1;
+
+    while (dst < dst_end) {
+        *dst++ = *src;
+        *dst++ = *(src+size);
+        src++;
+    }
+}
+
+/**
+ * Delta decode the compressed values in src, and put the resulting
+ * decoded n samples in dst.
+ *
+ * @param val starting value assumed by the delta sequence
+ * @param table delta sequence table
+ * @return size in bytes of the decoded data, must be src_size*2
+ */
+static int delta_decode(int8_t *dst, const uint8_t *src, int src_size,
+                        int8_t val, const int8_t *table)
+{
+    int n = src_size;
+    int8_t *dst0 = dst;
+
+    while (n--) {
+        uint8_t d = *src++;
+        val = av_clip(val + table[d & 0x0f], -127, 128);
+        *dst++ = val;
+        val = av_clip(val + table[d >> 4]  , -127, 128);
+        *dst++ = val;
+    }
+
+    return dst-dst0;
+}
 
 static int eightsvx_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
                                  AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
     EightSvxContext *esc = avctx->priv_data;
-    int16_t *out_data = data;
-    int consumed = buf_size;
-    const uint8_t *buf_end = buf + buf_size;
+    int out_data_size, n;
+    uint8_t *src, *dst;
 
-    if((*data_size >> 2) < buf_size)
-        return -1;
+    /* decode and interleave the first packet */
+    if (!esc->samples && avpkt) {
+        uint8_t *deinterleaved_samples;
 
-    if(avctx->frame_number == 0) {
-        esc->fib_acc = buf[1] << 8;
-        buf_size -= 2;
-        buf += 2;
+        esc->samples_size = avctx->codec->id == CODEC_ID_8SVX_RAW ?
+            avpkt->size : avctx->channels + (avpkt->size-avctx->channels) * 2;
+        if (!(esc->samples = av_malloc(esc->samples_size)))
+            return AVERROR(ENOMEM);
+
+        /* decompress */
+        if (avctx->codec->id == CODEC_ID_8SVX_FIB || avctx->codec->id == CODEC_ID_8SVX_EXP) {
+            const uint8_t *buf = avpkt->data;
+            int buf_size = avpkt->size;
+            int n = esc->samples_size;
+
+            if (!(deinterleaved_samples = av_mallocz(n)))
+                return AVERROR(ENOMEM);
+
+            /* the uncompressed starting value is contained in the first byte */
+            if (avctx->channels == 2) {
+                delta_decode(deinterleaved_samples      , buf+1, buf_size/2-1, buf[0], esc->table);
+                buf += buf_size/2;
+                delta_decode(deinterleaved_samples+n/2-1, buf+1, buf_size/2-1, buf[0], esc->table);
+            } else
+                delta_decode(deinterleaved_samples      , buf+1, buf_size-1  , buf[0], esc->table);
+        } else {
+            deinterleaved_samples = avpkt->data;
+        }
+
+        if (avctx->channels == 2)
+            interleave_stereo(esc->samples, deinterleaved_samples, esc->samples_size);
+        else
+            memcpy(esc->samples, deinterleaved_samples, esc->samples_size);
     }
 
-    *data_size = buf_size << 2;
-
-    while(buf < buf_end) {
-        uint8_t d = *buf++;
-        esc->fib_acc += esc->table[d & 0x0f];
-        *out_data++ = esc->fib_acc;
-        esc->fib_acc += esc->table[d >> 4];
-        *out_data++ = esc->fib_acc;
+    /* return single packed with fixed size */
+    out_data_size = FFMIN(MAX_FRAME_SIZE, esc->samples_size - esc->samples_idx);
+    if (*data_size < out_data_size) {
+        av_log(avctx, AV_LOG_ERROR, "Provided buffer with size %d is too small.\n", *data_size);
+        return AVERROR(EINVAL);
     }
 
-    return consumed;
+    *data_size = out_data_size;
+    dst = data;
+    src = esc->samples + esc->samples_idx;
+    for (n = out_data_size; n > 0; n--)
+        *dst++ = *src++ + 128;
+    esc->samples_idx += *data_size;
+
+    return avctx->codec->id == CODEC_ID_8SVX_FIB || avctx->codec->id == CODEC_ID_8SVX_EXP ?
+        (avctx->frame_number == 0)*2 + out_data_size / 2 :
+        out_data_size;
 }
 
 static av_cold int eightsvx_decode_init(AVCodecContext *avctx)
 {
     EightSvxContext *esc = avctx->priv_data;
 
-    switch(avctx->codec->id) {
-        case CODEC_ID_8SVX_FIB:
-          esc->table = fibonacci;
-          break;
-        case CODEC_ID_8SVX_EXP:
-          esc->table = exponential;
-          break;
-        default:
-          return -1;
+    if (avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "8SVX does not support more than 2 channels\n");
+        return AVERROR_INVALIDDATA;
     }
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+
+    switch (avctx->codec->id) {
+    case CODEC_ID_8SVX_FIB: esc->table = fibonacci;    break;
+    case CODEC_ID_8SVX_EXP: esc->table = exponential;  break;
+    case CODEC_ID_8SVX_RAW: esc->table = NULL;         break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Invalid codec id %d.\n", avctx->codec->id);
+        return AVERROR_INVALIDDATA;
+    }
+    avctx->sample_fmt = AV_SAMPLE_FMT_U8;
+
+    return 0;
+}
+
+static av_cold int eightsvx_decode_close(AVCodecContext *avctx)
+{
+    EightSvxContext *esc = avctx->priv_data;
+
+    av_freep(&esc->samples);
+    esc->samples_size = 0;
+    esc->samples_idx = 0;
+
     return 0;
 }
 
@@ -104,6 +198,7 @@ AVCodec ff_eightsvx_fib_decoder = {
   .priv_data_size = sizeof (EightSvxContext),
   .init           = eightsvx_decode_init,
   .decode         = eightsvx_decode_frame,
+  .close          = eightsvx_decode_close,
   .long_name      = NULL_IF_CONFIG_SMALL("8SVX fibonacci"),
 };
 
@@ -114,5 +209,17 @@ AVCodec ff_eightsvx_exp_decoder = {
   .priv_data_size = sizeof (EightSvxContext),
   .init           = eightsvx_decode_init,
   .decode         = eightsvx_decode_frame,
+  .close          = eightsvx_decode_close,
   .long_name      = NULL_IF_CONFIG_SMALL("8SVX exponential"),
 };
+
+AVCodec ff_eightsvx_raw_decoder = {
+  .name           = "8svx_raw",
+  .type           = AVMEDIA_TYPE_AUDIO,
+  .id             = CODEC_ID_8SVX_RAW,
+  .priv_data_size = sizeof(EightSvxContext),
+  .init           = eightsvx_decode_init,
+  .decode         = eightsvx_decode_frame,
+  .close          = eightsvx_decode_close,
+  .long_name      = NULL_IF_CONFIG_SMALL("8SVX rawaudio"),
+};
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e293438e45..ac16e06df8 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -136,6 +136,7 @@ OBJS-$(CONFIG_EATQI_DECODER)           += eatqi.o eaidct.o mpeg12.o \
 OBJS-$(CONFIG_EIGHTBPS_DECODER)        += 8bps.o
 OBJS-$(CONFIG_EIGHTSVX_EXP_DECODER)    += 8svx.o
 OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER)    += 8svx.o
+OBJS-$(CONFIG_EIGHTSVX_RAW_DECODER)    += 8svx.o
 OBJS-$(CONFIG_ESCAPE124_DECODER)       += escape124.o
 OBJS-$(CONFIG_FFV1_DECODER)            += ffv1.o rangecoder.o
 OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1.o rangecoder.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index fc74eeaf8c..ff032dda85 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -104,6 +104,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (EIGHTBPS, eightbps);
     REGISTER_DECODER (EIGHTSVX_EXP, eightsvx_exp);
     REGISTER_DECODER (EIGHTSVX_FIB, eightsvx_fib);
+    REGISTER_DECODER (EIGHTSVX_RAW, eightsvx_raw);
     REGISTER_DECODER (ESCAPE124, escape124);
     REGISTER_ENCDEC  (FFV1, ffv1);
     REGISTER_ENCDEC  (FFVHUFF, ffvhuff);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 2fbf9cfc2a..d1a5e6655e 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -204,6 +204,7 @@ enum CodecID {
     CODEC_ID_PRORES,
     CODEC_ID_JV,
     CODEC_ID_DFA,
+    CODEC_ID_8SVX_RAW,
 
     /* various PCM "codecs" */
     CODEC_ID_PCM_S16LE= 0x10000,
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 067cf4af89..471e3aaa9a 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR  5
+#define LIBAVCODEC_VERSION_MINOR  6
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavformat/iff.c b/libavformat/iff.c
index b092de0b08..f6edcdda2e 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -60,8 +60,6 @@
 #define RIGHT   4
 #define STEREO  6
 
-#define PACKET_SIZE 1024
-
 /**
  * This number of bytes if added at the beginning of each AVPacket
  * which contain additional information about video properties
@@ -97,19 +95,6 @@ typedef struct {
     unsigned  masking;      ///< masking method used
 } IffDemuxContext;
 
-
-static void interleave_stereo(const uint8_t *src, uint8_t *dest, int size)
-{
-    uint8_t *end = dest + size;
-    size = size>>1;
-
-    while(dest < end) {
-        *dest++ = *src;
-        *dest++ = *(src+size);
-        src++;
-    }
-}
-
 /* Metadata string read */
 static int get_metadata(AVFormatContext *s,
                         const char *const tag,
@@ -255,7 +240,7 @@ static int iff_read_header(AVFormatContext *s,
 
         switch (iff->svx8_compression) {
         case COMP_NONE:
-            st->codec->codec_id = CODEC_ID_PCM_S8;
+            st->codec->codec_id = CODEC_ID_8SVX_RAW;
             break;
         case COMP_FIB:
             st->codec->codec_id = CODEC_ID_8SVX_FIB;
@@ -330,15 +315,8 @@ static int iff_read_packet(AVFormatContext *s,
     if(iff->sent_bytes >= iff->body_size)
         return AVERROR(EIO);
 
-    if(st->codec->channels == 2) {
-        uint8_t sample_buffer[PACKET_SIZE];
-
-        ret = avio_read(pb, sample_buffer, PACKET_SIZE);
-        if(av_new_packet(pkt, PACKET_SIZE) < 0) {
-            av_log(s, AV_LOG_ERROR, "cannot allocate packet\n");
-            return AVERROR(ENOMEM);
-        }
-        interleave_stereo(sample_buffer, pkt->data, PACKET_SIZE);
+    if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+        ret = av_get_packet(pb, pkt, iff->body_size);
     } else if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
         uint8_t *buf;
 
@@ -349,23 +327,13 @@ static int iff_read_packet(AVFormatContext *s,
         buf = pkt->data;
         bytestream_put_be16(&buf, 2);
         ret = avio_read(pb, buf, iff->body_size);
-    } else {
-        ret = av_get_packet(pb, pkt, PACKET_SIZE);
     }
 
     if(iff->sent_bytes == 0)
         pkt->flags |= AV_PKT_FLAG_KEY;
+    iff->sent_bytes = iff->body_size;
 
-    if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
-        iff->sent_bytes += PACKET_SIZE;
-    } else {
-        iff->sent_bytes = iff->body_size;
-    }
     pkt->stream_index = 0;
-    if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
-        pkt->pts = iff->audio_frame_count;
-        iff->audio_frame_count += ret / st->codec->channels;
-    }
     return ret;
 }
 
diff --git a/tests/ref/fate/iff-fibonacci b/tests/ref/fate/iff-fibonacci
index e452f31e6c..947f78e964 100644
--- a/tests/ref/fate/iff-fibonacci
+++ b/tests/ref/fate/iff-fibonacci
@@ -1 +1 @@
-e968a853779bb6438339e3b8d69d8d24
+e76b025238a6a27968f8644f4ccc3207

From d2549ba9df1a1aac8c0ae19bfca2c81e508ba02e Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Tue, 17 May 2011 01:17:27 +0200
Subject: [PATCH 043/830] Fix some mov files with little endian audio (tickets
 201 - 203).

---
 libavformat/mov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 745768f3b9..a41b8ce73d 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -718,7 +718,7 @@ static int mov_read_enda(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    little_endian = avio_rb16(pb);
+    little_endian = avio_rb16(pb) & 0xFF;
     av_dlog(c->fc, "enda %d\n", little_endian);
     if (little_endian == 1) {
         switch (st->codec->codec_id) {

From 6d721f714ee61d6e244b980113aa24d5afcbfee0 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Tue, 17 May 2011 01:47:12 +0200
Subject: [PATCH 044/830] Fix ff_mov_write_chan() so it can be used by other
 muxers.

---
 libavformat/cafenc.c |  6 +++++-
 libavformat/isom.c   | 10 +---------
 libavformat/isom.h   |  3 +--
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/libavformat/cafenc.c b/libavformat/cafenc.c
index 0f33c6b592..13636f30ee 100644
--- a/libavformat/cafenc.c
+++ b/libavformat/cafenc.c
@@ -134,7 +134,11 @@ static int caf_write_header(AVFormatContext *s)
     avio_wb32(pb, enc->channels);                     //< mChannelsPerFrame
     avio_wb32(pb, enc->bits_per_coded_sample);        //< mBitsPerChannel
 
-    ff_mov_write_chan(s, enc->channel_layout, "chan");
+    if (enc->channel_layout) {
+        ffio_wfourcc(pb, "chan");
+        avio_wb64(pb, 12);
+        ff_mov_write_chan(pb, enc->channel_layout);
+    }
 
     ffio_wfourcc(pb, "data"); //< Audio Data chunk
     caf->data = avio_tell(pb);
diff --git a/libavformat/isom.c b/libavformat/isom.c
index 3259128d3a..45ccdd2864 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -27,7 +27,6 @@
 #include "internal.h"
 #include "isom.h"
 #include "riff.h"
-#include "avio_internal.h"
 #include "libavcodec/mpeg4audio.h"
 #include "libavcodec/mpegaudiodata.h"
 
@@ -484,24 +483,17 @@ void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec)
     avio_skip(pb, 8);
 }
 
-void ff_mov_write_chan(AVFormatContext *s, int64_t channel_layout,
-                       const char *chunk_type)
+void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout)
 {
-    AVIOContext *pb = s->pb;
     const MovChannelLayout *layouts;
     uint32_t layout_tag = 0;
 
-    if (!channel_layout)
-        return;
-
     for (layouts = mov_channel_layout; layouts->channel_layout; layouts++)
         if (channel_layout == layouts->channel_layout) {
             layout_tag = layouts->layout_tag;
             break;
         }
 
-    ffio_wfourcc(pb, chunk_type);
-    avio_wb64(pb, 12);             //< mChunkSize
     if (layout_tag) {
         avio_wb32(pb, layout_tag); //< mChannelLayoutTag
         avio_wb32(pb, 0);          //< mChannelBitmap
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 6649d85739..2b64486129 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -157,7 +157,6 @@ enum CodecID ff_mov_get_lpcm_codec_id(int bps, int flags);
 
 int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries);
 void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec);
-void ff_mov_write_chan(AVFormatContext *s, int64_t channel_layout,
-                       const char *chunk_type);
+void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout);
 
 #endif /* AVFORMAT_ISOM_H */

From f2962ac8ad353c12d81bc515a01427f0912b4a69 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Tue, 17 May 2011 01:56:01 +0200
Subject: [PATCH 045/830] Write channel_layout for multichannel aif files.

---
 libavformat/Makefile  | 2 +-
 libavformat/aiffenc.c | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 26c094cbfc..13fe2371bf 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -24,7 +24,7 @@ OBJS-$(CONFIG_AC3_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_ADTS_MUXER)                += adtsenc.o
 OBJS-$(CONFIG_AEA_DEMUXER)               += aea.o pcm.o
 OBJS-$(CONFIG_AIFF_DEMUXER)              += aiffdec.o riff.o pcm.o isom.o
-OBJS-$(CONFIG_AIFF_MUXER)                += aiffenc.o riff.o
+OBJS-$(CONFIG_AIFF_MUXER)                += aiffenc.o riff.o isom.o
 OBJS-$(CONFIG_AMR_DEMUXER)               += amr.o
 OBJS-$(CONFIG_AMR_MUXER)                 += amr.o
 OBJS-$(CONFIG_ANM_DEMUXER)               += anm.o
diff --git a/libavformat/aiffenc.c b/libavformat/aiffenc.c
index cc3cbc4762..34ab0cc246 100644
--- a/libavformat/aiffenc.c
+++ b/libavformat/aiffenc.c
@@ -22,6 +22,7 @@
 #include "avformat.h"
 #include "aiff.h"
 #include "avio_internal.h"
+#include "isom.h"
 
 typedef struct {
     int64_t form;
@@ -61,6 +62,12 @@ static int aiff_write_header(AVFormatContext *s)
         avio_wb32(pb, 0xA2805140);
     }
 
+    if (enc->channels > 2 && enc->channel_layout) {
+        ffio_wfourcc(pb, "CHAN");
+        avio_wb32(pb, 12);
+        ff_mov_write_chan(pb, enc->channel_layout);
+    }
+
     /* Common chunk */
     ffio_wfourcc(pb, "COMM");
     avio_wb32(pb, aifc ? 24 : 18); /* size */

From e6e7ba0ce3aadef32f7f16f706c4a0406b5bd70f Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 17 May 2011 00:46:48 +0200
Subject: [PATCH 046/830] Add some forgotten const to function arguments in
 libavfilter & libavformat.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavfilter/gradfun.h     | 14 +++++++-------
 libavfilter/vf_gradfun.c  |  6 +++---
 libavfilter/vf_unsharp.c  |  4 ++--
 libavfilter/x86/gradfun.c |  6 +++---
 libavformat/mmst.c        |  2 +-
 libavformat/mov.c         |  4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h
index 3dacbcb252..3c01085b83 100644
--- a/libavfilter/gradfun.h
+++ b/libavfilter/gradfun.h
@@ -33,16 +33,16 @@ typedef struct {
     int chroma_r;  ///< blur radius for the chroma planes
     uint16_t *buf; ///< holds image data for blur algorithm passed into filter.
     /// DSP functions.
-    void (*filter_line) (uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
-    void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
+    void (*filter_line) (uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
+    void (*blur_line) (uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);
 } GradFunContext;
 
-void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
-void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
+void ff_gradfun_filter_line_c(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
+void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);
 
-void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
-void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
+void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
+void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
 
-void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
+void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);
 
 #endif /* AVFILTER_GRADFUN_H */
diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
index 83ed3d79e4..32dd3c1072 100644
--- a/libavfilter/vf_gradfun.c
+++ b/libavfilter/vf_gradfun.c
@@ -49,7 +49,7 @@ DECLARE_ALIGNED(16, static const uint16_t, dither)[8][8] = {
     {0x54,0x34,0x4C,0x2C,0x52,0x32,0x4A,0x2A},
 };
 
-void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
+void ff_gradfun_filter_line_c(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
     int x;
     for (x = 0; x < width; x++, dc += x & 1) {
@@ -63,7 +63,7 @@ void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int widt
     }
 }
 
-void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width)
+void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width)
 {
     int x, v, old;
     for (x = 0; x < width; x++) {
@@ -74,7 +74,7 @@ void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t
     }
 }
 
-static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r)
+static void filter(GradFunContext *ctx, uint8_t *dst, const uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r)
 {
     int bstride = FFALIGN(width, 16) / 2;
     int y;
diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c
index 7aa7a43651..fa75de5d94 100644
--- a/libavfilter/vf_unsharp.c
+++ b/libavfilter/vf_unsharp.c
@@ -63,7 +63,7 @@ typedef struct {
     FilterParam chroma; ///< chroma parameters (width, height, amount)
 } UnsharpContext;
 
-static void unsharpen(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, FilterParam *fp)
+static void unsharpen(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int width, int height, FilterParam *fp)
 {
     uint32_t **sc = fp->sc;
     uint32_t sr[(MAX_SIZE * MAX_SIZE) - 1], tmp1, tmp2;
@@ -96,7 +96,7 @@ static void unsharpen(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride
                 tmp1 = sc[z + 1][x + fp->steps_x] + tmp2; sc[z + 1][x + fp->steps_x] = tmp2;
             }
             if (x >= fp->steps_x && y >= fp->steps_y) {
-                uint8_t* srx = src - fp->steps_y * src_stride + x - fp->steps_x;
+                const uint8_t* srx = src - fp->steps_y * src_stride + x - fp->steps_x;
                 uint8_t* dsx = dst - fp->steps_y * dst_stride + x - fp->steps_x;
 
                 res = (int32_t)*srx + ((((int32_t) * srx - (int32_t)((tmp1 + fp->halfscale) >> fp->scalebits)) * fp->amount) >> 16);
diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
index 894a44b9ff..05d4a6fd6e 100644
--- a/libavfilter/x86/gradfun.c
+++ b/libavfilter/x86/gradfun.c
@@ -23,7 +23,7 @@
 DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
 DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
 
-void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
+void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
 #if HAVE_MMX
     intptr_t x;
@@ -71,7 +71,7 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int w
 #endif
 }
 
-void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
+void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
 #if HAVE_SSSE3
     intptr_t x;
@@ -118,7 +118,7 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int
 #endif // HAVE_SSSE3
 }
 
-void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width)
+void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width)
 {
 #if HAVE_SSE
 #define BLURV(load)\
diff --git a/libavformat/mmst.c b/libavformat/mmst.c
index 7b0af6c320..a3db288b35 100644
--- a/libavformat/mmst.c
+++ b/libavformat/mmst.c
@@ -152,7 +152,7 @@ static int send_command_packet(MMSTContext *mmst)
     return 0;
 }
 
-static void mms_put_utf16(MMSContext *mms, uint8_t *src)
+static void mms_put_utf16(MMSContext *mms, const uint8_t *src)
 {
     AVIOContext bic;
     int size = mms->write_out_ptr - mms->out_buffer;
diff --git a/libavformat/mov.c b/libavformat/mov.c
index a41b8ce73d..26e7a3a2df 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1689,13 +1689,13 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     }
 }
 
-static int mov_open_dref(AVIOContext **pb, char *src, MOVDref *ref)
+static int mov_open_dref(AVIOContext **pb, const char *src, MOVDref *ref)
 {
     /* try relative path, we do not try the absolute because it can leak information about our
        system to an attacker */
     if (ref->nlvl_to > 0 && ref->nlvl_from > 0) {
         char filename[1024];
-        char *src_path;
+        const char *src_path;
         int i, l;
 
         /* find a source dir */

From cb8b824a0899a086ed22eef84b0eedbcab6788c9 Mon Sep 17 00:00:00 2001
From: Compn <tempn@twmi.rr.com>
Date: Mon, 16 May 2011 23:30:41 -0400
Subject: [PATCH 047/830] update changelog with 9/10 bit H264 and FFV1 changes

---
 Changelog | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Changelog b/Changelog
index 78320b0331..702f53f795 100644
--- a/Changelog
+++ b/Changelog
@@ -15,6 +15,8 @@ version <next>:
 - DPX image encoder
 - SMPTE 302M AES3 audio decoder
 - Apple Core Audio Format muxer
+- 9bit and 10bit H.264 decoding
+- 9bit and 10bit FFV1 encoding / decoding
 
 
 version 0.7_beta1:

From bec994dff22c5f3f0a5c7a96b333b3faab81c02d Mon Sep 17 00:00:00 2001
From: Nicolas George <nicolas.george@normalesup.org>
Date: Fri, 15 Apr 2011 17:08:12 +0200
Subject: [PATCH 048/830] Ogg demuxer: give meaningful error codes and
 warnings.

Signed-off-by: Nicolas George <nicolas.george@normalesup.org>
---
 libavformat/oggdec.c | 46 ++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index c799ce5205..7f6536545b 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -193,7 +193,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     AVIOContext *bc = s->pb;
     struct ogg *ogg = s->priv_data;
     struct ogg_stream *os;
-    int i = 0;
+    int ret, i = 0;
     int flags, nsegs;
     uint64_t gp;
     uint32_t serial;
@@ -203,8 +203,9 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     uint8_t sync[4];
     int sp = 0;
 
-    if (avio_read (bc, sync, 4) < 4)
-        return -1;
+    ret = avio_read (bc, sync, 4);
+    if (ret < 4)
+        return ret < 0 ? ret : AVERROR_EOF;
 
     do{
         int c;
@@ -216,17 +217,17 @@ static int ogg_read_page(AVFormatContext *s, int *str)
 
         c = avio_r8(bc);
         if (url_feof(bc))
-            return -1;
+            return AVERROR_EOF;
         sync[sp++ & 3] = c;
     }while (i++ < MAX_PAGE_SIZE);
 
     if (i >= MAX_PAGE_SIZE){
         av_log (s, AV_LOG_INFO, "ogg, can't find sync word\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     if (avio_r8(bc) != 0)      /* version */
-        return -1;
+        return AVERROR_INVALIDDATA;
 
     flags = avio_r8(bc);
     gp = avio_rl64 (bc);
@@ -251,7 +252,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
             idx = ogg_new_stream(s, serial, 1);
         }
         if (idx < 0)
-            return -1;
+            return idx;
     }
 
     os = ogg->streams + idx;
@@ -260,8 +261,9 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     if(os->psize > 0)
         ogg_new_buf(ogg, idx);
 
-    if (avio_read (bc, os->segments, nsegs) < nsegs)
-        return -1;
+    ret = avio_read (bc, os->segments, nsegs);
+    if (ret < nsegs)
+        return ret < 0 ? ret : AVERROR_EOF;
 
     os->nsegs = nsegs;
     os->segp = 0;
@@ -292,8 +294,9 @@ static int ogg_read_page(AVFormatContext *s, int *str)
         os->buf = nb;
     }
 
-    if (avio_read (bc, os->buf + os->bufpos, size) < size)
-        return -1;
+    ret = avio_read (bc, os->buf + os->bufpos, size);
+    if (ret < size)
+        return ret < 0 ? ret : AVERROR_EOF;
 
     os->bufpos += size;
     os->granule = gp;
@@ -309,7 +312,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
                       int64_t *fpos)
 {
     struct ogg *ogg = s->priv_data;
-    int idx, i;
+    int idx, i, ret;
     struct ogg_stream *os;
     int complete = 0;
     int segp = 0, psize = 0;
@@ -322,8 +325,9 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
         idx = ogg->curidx;
 
         while (idx < 0){
-            if (ogg_read_page (s, &idx) < 0)
-                return -1;
+            ret = ogg_read_page (s, &idx);
+            if (ret < 0)
+                return ret;
         }
 
         os = ogg->streams + idx;
@@ -338,6 +342,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
             if (os->header < 0){
                 os->codec = ogg_find_codec (os->buf, os->bufpos);
                 if (!os->codec){
+                    av_log(s, AV_LOG_WARNING, "Codec not found\n");
                     os->header = 0;
                     return 0;
                 }
@@ -439,10 +444,12 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
 static int ogg_get_headers(AVFormatContext *s)
 {
     struct ogg *ogg = s->priv_data;
+    int ret;
 
     do{
-        if (ogg_packet (s, NULL, NULL, NULL, NULL) < 0)
-            return -1;
+        ret = ogg_packet (s, NULL, NULL, NULL, NULL);
+        if (ret < 0)
+            return ret;
     }while (!ogg->headers);
 
 #if 0
@@ -503,11 +510,12 @@ static int ogg_get_length(AVFormatContext *s)
 static int ogg_read_header(AVFormatContext *s, AVFormatParameters *ap)
 {
     struct ogg *ogg = s->priv_data;
-    int i;
+    int ret, i;
     ogg->curidx = -1;
     //linear headers seek from start
-    if (ogg_get_headers (s) < 0){
-        return -1;
+    ret = ogg_get_headers (s);
+    if (ret < 0){
+        return ret;
     }
 
     for (i = 0; i < ogg->nstreams; i++)

From 1362a291c971ba2c46c08f0533265b294030de27 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 13:42:43 +0100
Subject: [PATCH 049/830] qdm2: include correct header for rdft

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/qdm2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 198f11f271..76ecb79621 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -38,7 +38,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "fft.h"
+#include "rdft.h"
 #include "mpegaudio.h"
 
 #include "qdm2data.h"

From 6f2309ed2e0c5c02ab417f3f0acad23e86411079 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 11:48:28 +0100
Subject: [PATCH 050/830] dct: build dct32 as separate object files

This builds the float and fixed-point versions of dct32 separately
instead of #including the file in dct.c and mpegaudiodec.c.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/Makefile       |  2 +-
 libavcodec/dct.c          |  6 ++----
 libavcodec/dct32.c        | 13 +++++++++++--
 libavcodec/dct32.h        | 25 +++++++++++++++++++++++++
 libavcodec/dct32_fixed.c  | 20 ++++++++++++++++++++
 libavcodec/dct32_float.c  | 20 ++++++++++++++++++++
 libavcodec/mpegaudiodec.c |  9 ++-------
 7 files changed, 81 insertions(+), 14 deletions(-)
 create mode 100644 libavcodec/dct32.h
 create mode 100644 libavcodec/dct32_fixed.c
 create mode 100644 libavcodec/dct32_float.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index aaf9ceb4f9..fa70216c9c 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -27,7 +27,7 @@ OBJS = allcodecs.o                                                      \
 OBJS-$(CONFIG_AANDCT)                  += aandcttab.o
 OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
 OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
-OBJS-$(CONFIG_DCT)                     += dct.o
+OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_DWT)                     += dwt.o
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
 FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index caa6bdb4b4..5c63af30a1 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -30,9 +30,7 @@
 #include <math.h>
 #include "libavutil/mathematics.h"
 #include "dct.h"
-
-#define DCT32_FLOAT
-#include "dct32.c"
+#include "dct32.h"
 
 /* sin((M_PI * x / (2*n)) */
 #define SIN(s,n,x) (s->costab[(n) - (x)])
@@ -210,7 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
         }
     }
 
-    s->dct32 = dct32;
+    s->dct32 = ff_dct32_float;
     if (HAVE_MMX)     ff_dct_init_mmx(s);
 
     return 0;
diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c
index ae99d88996..272e0dbf95 100644
--- a/libavcodec/dct32.c
+++ b/libavcodec/dct32.c
@@ -19,10 +19,19 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifdef DCT32_FLOAT
+#include "dct32.h"
+#include "mathops.h"
+
+#if DCT32_FLOAT
+#   define dct32 ff_dct32_float
 #   define FIXHR(x)       ((float)(x))
 #   define MULH3(x, y, s) ((s)*(y)*(x))
 #   define INTFLOAT float
+#else
+#   define dct32 ff_dct32_fixed
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#   define MULH3(x, y, s) MULH((s)*(x), y)
+#   define INTFLOAT int
 #endif
 
 
@@ -103,7 +112,7 @@
 #define ADD(a, b) val##a += val##b
 
 /* DCT32 without 1/sqrt(2) coef zero scaling. */
-static void dct32(INTFLOAT *out, const INTFLOAT *tab)
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
 {
     INTFLOAT tmp0, tmp1;
 
diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h
new file mode 100644
index 0000000000..110338d25c
--- /dev/null
+++ b/libavcodec/dct32.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCT32_H
+#define AVCODEC_DCT32_H
+
+void ff_dct32_float(float *dst, const float *src);
+void ff_dct32_fixed(int *dst, const int *src);
+
+#endif
diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c
new file mode 100644
index 0000000000..7eb9dc1a53
--- /dev/null
+++ b/libavcodec/dct32_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 0
+#include "dct32.c"
diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c
new file mode 100644
index 0000000000..727ec3caca
--- /dev/null
+++ b/libavcodec/dct32_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 1
+#include "dct32.c"
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 8c42e09666..35e217ea3e 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -29,6 +29,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mathops.h"
+#include "dct32.h"
 
 /*
  * TODO:
@@ -68,12 +69,6 @@
 #include "mpegaudiodata.h"
 #include "mpegaudiodectab.h"
 
-#if CONFIG_FLOAT
-#    include "fft.h"
-#else
-#    include "dct32.c"
-#endif
-
 static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
 static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
                                int *dither_state, OUT_INT *samples, int incr);
@@ -637,7 +632,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
     offset = *synth_buf_offset;
     synth_buf = synth_buf_ptr + offset;
 
-    dct32(synth_buf, sb_samples);
+    ff_dct32_fixed(synth_buf, sb_samples);
     apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
 
     offset = (offset - 32) & 511;

From a4b6000b00850f74fabc06f673da49331d5fdf5d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 17 May 2011 16:48:39 +0200
Subject: [PATCH 051/830] ffmpeg: reset top_field_first in opt_input_file().

This allows seting tff for inputs without also setting it for the output
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index 6ab6b9971a..ca44c82b86 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3456,6 +3456,7 @@ static void opt_input_file(const char *filename)
     nb_input_files++;
 
     video_channel = 0;
+    top_field_first = -1;
 
     av_freep(&video_codec_name);
     av_freep(&audio_codec_name);

From d0005d347d0831c904630fe70408c9fd4eec18e8 Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Mon, 16 May 2011 17:18:22 -0400
Subject: [PATCH 052/830] Modify x86util.asm to ease transitioning to 10-bit
 H.264 assembly.

Arguments for variable size instructions are added to many macros, along
with other various changes. The x86util.asm code was ported from x264.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/dsputilenc_yasm.asm |  12 +-
 libavcodec/x86/h264_idct.asm       |  26 ++--
 libavcodec/x86/vp8dsp.asm          |  10 +-
 libavcodec/x86/x86util.asm         | 228 ++++++++++++++++++-----------
 4 files changed, 163 insertions(+), 113 deletions(-)

diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm
index fdd80e8fcb..6063ff1040 100644
--- a/libavcodec/x86/dsputilenc_yasm.asm
+++ b/libavcodec/x86/dsputilenc_yasm.asm
@@ -59,12 +59,12 @@ SECTION .text
 %endmacro
 
 %macro HADAMARD8 0
-    SUMSUB_BADC       m0, m1, m2, m3
-    SUMSUB_BADC       m4, m5, m6, m7
-    SUMSUB_BADC       m0, m2, m1, m3
-    SUMSUB_BADC       m4, m6, m5, m7
-    SUMSUB_BADC       m0, m4, m1, m5
-    SUMSUB_BADC       m2, m6, m3, m7
+    SUMSUB_BADC       w, 0, 1, 2, 3
+    SUMSUB_BADC       w, 4, 5, 6, 7
+    SUMSUB_BADC       w, 0, 2, 1, 3
+    SUMSUB_BADC       w, 4, 6, 5, 7
+    SUMSUB_BADC       w, 0, 4, 1, 5
+    SUMSUB_BADC       w, 2, 6, 3, 7
 %endmacro
 
 %macro ABS1_SUM 3
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index d237b3eefd..ae70a3049b 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -59,11 +59,11 @@ SECTION .text
     movq         m2, [%2+16]
     movq         m3, [%2+24]
 
-    IDCT4_1D      0, 1, 2, 3, 4, 5
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
     mova         m6, [pw_32]
     TRANSPOSE4x4W 0, 1, 2, 3, 4
     paddw        m0, m6
-    IDCT4_1D      0, 1, 2, 3, 4, 5
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
     pxor         m7, m7
 
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
@@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0
 
     mova         m2, %1
     mova         m5, %2
-    SUMSUB_BA    m5, m2
-    SUMSUB_BA    m6, m5
-    SUMSUB_BA    m4, m2
-    SUMSUB_BA    m7, m6
-    SUMSUB_BA    m0, m4
-    SUMSUB_BA    m3, m2
-    SUMSUB_BA    m1, m5
+    SUMSUB_BA    w, 5, 2
+    SUMSUB_BA    w, 6, 5
+    SUMSUB_BA    w, 4, 2
+    SUMSUB_BA    w, 7, 6
+    SUMSUB_BA    w, 0, 4
+    SUMSUB_BA    w, 3, 2
+    SUMSUB_BA    w, 1, 5
     SWAP          7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
 %endmacro
 
@@ -715,10 +715,10 @@ x264_add8x4_idct_sse2:
     movhps m1, [r2+40]
     movhps m2, [r2+48]
     movhps m3, [r2+56]
-    IDCT4_1D 0,1,2,3,4,5
+    IDCT4_1D w,0,1,2,3,4,5
     TRANSPOSE2x4x4W 0,1,2,3,4
     paddw m0, [pw_32]
-    IDCT4_1D 0,1,2,3,4,5
+    IDCT4_1D w,0,1,2,3,4,5
     pxor  m7, m7
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
     lea   r0, [r0+r3*2]
@@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
 
 %macro WALSH4_1D 5
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
-    SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
+    SUMSUB_BADC w, %4, %3, %2, %1, %5
+    SUMSUB_BADC w, %4, %2, %3, %1, %5
     SWAP %1, %4, %3
 %endmacro
 
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index e974f12642..7d9ebc9463 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3
 ;           %5/%6 are temporary registers
 ;           we assume m6/m7 have constant words 20091/17734 loaded in them
 %macro VP8_IDCT_TRANSFORM4x4_1D 6
-    SUMSUB_BA           m%3, m%1, m%5     ;t0, t1
+    SUMSUB_BA         w, %3,  %1,  %5     ;t0, t1
     VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
-    SUMSUB_BA           m%4, m%3, m%5     ;tmp0, tmp3
-    SUMSUB_BA           m%2, m%1, m%5     ;tmp1, tmp2
+    SUMSUB_BA         w, %4,  %3,  %5     ;tmp0, tmp3
+    SUMSUB_BA         w, %2,  %1,  %5     ;tmp1, tmp2
     SWAP                 %4,  %1
     SWAP                 %4,  %3
 %endmacro
@@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse
 %endmacro
 
 %macro HADAMARD4_1D 4
-    SUMSUB_BADC m%2, m%1, m%4, m%3
-    SUMSUB_BADC m%4, m%2, m%3, m%1
+    SUMSUB_BADC w, %2, %1, %4, %3
+    SUMSUB_BADC w, %4, %2, %3, %1
     SWAP %1, %4, %3
 %endmacro
 
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 8c3fc87912..7bd985a33b 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -208,6 +208,17 @@
     pminub  %2, %4
 %endmacro
 
+%macro ABSD2_MMX 4
+    pxor    %3, %3
+    pxor    %4, %4
+    pcmpgtd %3, %1
+    pcmpgtd %4, %2
+    pxor    %1, %3
+    pxor    %2, %4
+    psubd   %1, %3
+    psubd   %2, %4
+%endmacro
+
 %macro ABSB_SSSE3 2
     pabsb   %1, %1
 %endmacro
@@ -230,12 +241,7 @@
 %macro SPLATB_MMX 3
     movd      %1, [%2-3] ;to avoid crossing a cacheline
     punpcklbw %1, %1
-%if mmsize==16
-    pshuflw   %1, %1, 0xff
-    punpcklqdq %1, %1
-%else
-    pshufw    %1, %1, 0xff
-%endif
+    SPLATW    %1, %1, 3
 %endmacro
 
 %macro SPLATB_SSSE3 3
@@ -243,125 +249,169 @@
     pshufb    %1, %3
 %endmacro
 
-%macro PALIGNR_MMX 4
-    %ifnidn %4, %2
+%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp
+    %define %%dst %1
+%if %0==5
+%ifnidn %1, %2
+    mova    %%dst, %2
+%endif
+    %rotate 1
+%endif
+%ifnidn %4, %2
     mova    %4, %2
-    %endif
-    %if mmsize == 8
-    psllq   %1, (8-%3)*8
+%endif
+%if mmsize==8
+    psllq   %%dst, (8-%3)*8
     psrlq   %4, %3*8
-    %else
-    pslldq  %1, 16-%3
+%else
+    pslldq  %%dst, 16-%3
     psrldq  %4, %3
-    %endif
-    por     %1, %4
+%endif
+    por     %%dst, %4
 %endmacro
 
-%macro PALIGNR_SSSE3 4
+%macro PALIGNR_SSSE3 4-5
+%if %0==5
+    palignr %1, %2, %3, %4
+%else
     palignr %1, %2, %3
+%endif
 %endmacro
 
 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
 %ifnum %5
-    mova   m%1, m%5
-    mova   m%3, m%5
+    pand   m%3, m%5, m%4 ; src .. y6 .. y4
+    pand   m%1, m%5, m%2 ; dst .. y6 .. y4
 %else
     mova   m%1, %5
-    mova   m%3, m%1
+    pand   m%3, m%1, m%4 ; src .. y6 .. y4
+    pand   m%1, m%1, m%2 ; dst .. y6 .. y4
 %endif
-    pand   m%1, m%2 ; dst .. y6 .. y4
-    pand   m%3, m%4 ; src .. y6 .. y4
-    psrlw  m%2, 8   ; dst .. y7 .. y5
-    psrlw  m%4, 8   ; src .. y7 .. y5
+    psrlw  m%2, 8        ; dst .. y7 .. y5
+    psrlw  m%4, 8        ; src .. y7 .. y5
 %endmacro
 
-%macro SUMSUB_BA 2-3
-%if %0==2
-    paddw   %1, %2
-    paddw   %2, %2
-    psubw   %2, %1
+%macro SUMSUB_BA 3-4
+%if %0==3
+    padd%1  m%2, m%3
+    padd%1  m%3, m%3
+    psub%1  m%3, m%2
 %else
-    mova    %3, %1
-    paddw   %1, %2
-    psubw   %2, %3
-%endif
-%endmacro
-
-%macro SUMSUB_BADC 4-5
-%if %0==5
-    SUMSUB_BA %1, %2, %5
-    SUMSUB_BA %3, %4, %5
+%if avx_enabled == 0
+    mova    m%4, m%2
+    padd%1  m%2, m%3
+    psub%1  m%3, m%4
 %else
-    paddw   %1, %2
-    paddw   %3, %4
-    paddw   %2, %2
-    paddw   %4, %4
-    psubw   %2, %1
-    psubw   %4, %3
+    padd%1  m%4, m%2, m%3
+    psub%1  m%3, m%2
+    SWAP    %2, %4
+%endif
 %endif
 %endmacro
 
-%macro SUMSUB2_AB 3
-    mova    %3, %1
-    paddw   %1, %1
-    paddw   %1, %2
-    psubw   %3, %2
-    psubw   %3, %2
+%macro SUMSUB_BADC 5-6
+%if %0==6
+    SUMSUB_BA %1, %2, %3, %6
+    SUMSUB_BA %1, %4, %5, %6
+%else
+    padd%1  m%2, m%3
+    padd%1  m%4, m%5
+    padd%1  m%3, m%3
+    padd%1  m%5, m%5
+    psub%1  m%3, m%2
+    psub%1  m%5, m%4
+%endif
 %endmacro
 
-%macro SUMSUB2_BA 3
-    mova    m%3, m%1
-    paddw   m%1, m%2
-    paddw   m%1, m%2
-    psubw   m%2, m%3
-    psubw   m%2, m%3
+%macro SUMSUB2_AB 4
+%ifnum %3
+    psub%1  m%4, m%2, m%3
+    psub%1  m%4, m%3
+    padd%1  m%2, m%2
+    padd%1  m%2, m%3
+%else
+    mova    m%4, m%2
+    padd%1  m%2, m%2
+    padd%1  m%2, %3
+    psub%1  m%4, %3
+    psub%1  m%4, %3
+%endif
 %endmacro
 
-%macro SUMSUBD2_AB 4
-    mova    %4, %1
-    mova    %3, %2
-    psraw   %2, 1  ; %2: %2>>1
-    psraw   %1, 1  ; %1: %1>>1
-    paddw   %2, %4 ; %2: %2>>1+%1
-    psubw   %1, %3 ; %1: %1>>1-%2
+%macro SUMSUB2_BA 4
+%if avx_enabled == 0
+    mova    m%4, m%2
+    padd%1  m%2, m%3
+    padd%1  m%2, m%3
+    psub%1  m%3, m%4
+    psub%1  m%3, m%4
+%else
+    padd%1  m%4, m%2, m%3
+    padd%1  m%4, m%3
+    psub%1  m%3, m%2
+    psub%1  m%3, m%2
+    SWAP     %2,  %4
+%endif
+%endmacro
+
+%macro SUMSUBD2_AB 5
+%ifnum %4
+    psra%1  m%5, m%2, 1  ; %3: %3>>1
+    psra%1  m%4, m%3, 1  ; %2: %2>>1
+    padd%1  m%4, m%2     ; %3: %3>>1+%2
+    psub%1  m%5, m%3     ; %2: %2>>1-%3
+    SWAP     %2, %5
+    SWAP     %3, %4
+%else
+    mova    %5, m%2
+    mova    %4, m%3
+    psra%1  m%3, 1  ; %3: %3>>1
+    psra%1  m%2, 1  ; %2: %2>>1
+    padd%1  m%3, %5 ; %3: %3>>1+%2
+    psub%1  m%2, %4 ; %2: %2>>1-%3
+%endif
 %endmacro
 
 %macro DCT4_1D 5
 %ifnum %5
-    SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
-    SUMSUB_BA   m%3, m%4, m%5
-    SUMSUB2_AB  m%1, m%2, m%5
+    SUMSUB_BADC w, %4, %1, %3, %2, %5
+    SUMSUB_BA   w, %3, %4, %5
+    SUMSUB2_AB  w, %1, %2, %5
     SWAP %1, %3, %4, %5, %2
 %else
-    SUMSUB_BADC m%4, m%1, m%3, m%2
-    SUMSUB_BA   m%3, m%4
-    mova       [%5], m%2
-    SUMSUB2_AB m%1, [%5], m%2
+    SUMSUB_BADC w, %4, %1, %3, %2
+    SUMSUB_BA   w, %3, %4
+    mova     [%5], m%2
+    SUMSUB2_AB  w, %1, [%5], %2
     SWAP %1, %3, %4, %2
 %endif
 %endmacro
 
-%macro IDCT4_1D 5-6
-%ifnum %5
-    SUMSUBD2_AB m%2, m%4, m%6, m%5
-    ; %2: %2>>1-%4 %4: %2+%4>>1
-    SUMSUB_BA   m%3, m%1, m%6
-    ; %3: %1+%3 %1: %1-%3
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
-    ; %4: %1+%3 + (%2+%4>>1)
-    ; %3: %1+%3 - (%2+%4>>1)
-    ; %2: %1-%3 + (%2>>1-%4)
-    ; %1: %1-%3 - (%2>>1-%4)
+%macro IDCT4_1D 6-7
+%ifnum %6
+    SUMSUBD2_AB %1, %3, %5, %7, %6
+    ; %3: %3>>1-%5 %5: %3+%5>>1
+    SUMSUB_BA   %1, %4, %2, %7
+    ; %4: %2+%4 %2: %2-%4
+    SUMSUB_BADC %1, %5, %4, %3, %2, %7
+    ; %5: %2+%4 + (%3+%5>>1)
+    ; %4: %2+%4 - (%3+%5>>1)
+    ; %3: %2-%4 + (%3>>1-%5)
+    ; %2: %2-%4 - (%3>>1-%5)
 %else
-    SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
-    SUMSUB_BA   m%3, m%1
-    SUMSUB_BADC m%4, m%3, m%2, m%1
+%ifidn %1, w
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+16]
+%else
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+32]
 %endif
-    SWAP %1, %4, %3
-    ; %1: %1+%3 + (%2+%4>>1) row0
-    ; %2: %1-%3 + (%2>>1-%4) row1
-    ; %3: %1-%3 - (%2>>1-%4) row2
-    ; %4: %1+%3 - (%2+%4>>1) row3
+    SUMSUB_BA   %1, %4, %2
+    SUMSUB_BADC %1, %5, %4, %3, %2
+%endif
+    SWAP %2, %5, %4
+    ; %2: %2+%4 + (%3+%5>>1) row0
+    ; %3: %2-%4 + (%3>>1-%5) row1
+    ; %4: %2-%4 - (%3>>1-%5) row2
+    ; %5: %2+%4 - (%3+%5>>1) row3
 %endmacro
 
 

From 4bac1bbc3bc6e102cd1e8bfd0a36db07d769dfea Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 13:07:08 +0100
Subject: [PATCH 053/830] mpegaudio: add _fixed suffix to some names

This adds a _fixed suffix to the fixed-point versions of things
with both float and fixed-point variants.  This makes it more
consistent with other dual-implementation things, e.g. fft.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpc.c                | 6 +++---
 libavcodec/mpegaudio.h          | 6 +++---
 libavcodec/mpegaudio_tablegen.c | 4 ++--
 libavcodec/mpegaudio_tablegen.h | 8 ++++----
 libavcodec/mpegaudiodec.c       | 4 ++--
 libavcodec/qdm2.c               | 6 +++---
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index 36e0f04539..15febefe0b 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -36,7 +36,7 @@
 
 void ff_mpc_init(void)
 {
-    ff_mpa_synth_init(ff_mpa_synth_window);
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
 }
 
 /**
@@ -51,8 +51,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
     for(ch = 0;  ch < channels; ch++){
         samples_ptr = samples + ch;
         for(i = 0; i < SAMPLES_PER_BAND; i++) {
-            ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
-                                ff_mpa_synth_window, &dither_state,
+            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+                                ff_mpa_synth_window_fixed, &dither_state,
                                 samples_ptr, channels,
                                 c->sb_samples[ch][i]);
             samples_ptr += 32 * channels;
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 2c3f2ec065..3422b6df68 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -158,9 +158,9 @@ typedef struct HuffTable {
 
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window[];
-void ff_mpa_synth_init(MPA_INT *window);
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+extern MPA_INT ff_mpa_synth_window_fixed[];
+void ff_mpa_synth_init_fixed(MPA_INT *window);
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
                          MPA_INT *window, int *dither_state,
                          OUT_INT *samples, int incr,
                          INTFLOAT sb_samples[SBLIMIT]);
diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c
index 27da2191d9..b4c240bd7c 100644
--- a/libavcodec/mpegaudio_tablegen.c
+++ b/libavcodec/mpegaudio_tablegen.c
@@ -33,9 +33,9 @@ int main(void)
 
     WRITE_ARRAY("static const", int8_t, table_4_3_exp);
     WRITE_ARRAY("static const", uint32_t, table_4_3_value);
-    WRITE_ARRAY("static const", uint32_t, exp_table);
+    WRITE_ARRAY("static const", uint32_t, exp_table_fixed);
     WRITE_ARRAY("static const", float, exp_table_float);
-    WRITE_2D_ARRAY("static const", uint32_t, expval_table);
+    WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed);
     WRITE_2D_ARRAY("static const", float, expval_table_float);
 
     return 0;
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 6bde277916..2264b739d2 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -33,8 +33,8 @@
 #else
 static int8_t   table_4_3_exp[TABLE_4_3_SIZE];
 static uint32_t table_4_3_value[TABLE_4_3_SIZE];
-static uint32_t exp_table[512];
-static uint32_t expval_table[512][16];
+static uint32_t exp_table_fixed[512];
+static uint32_t expval_table_fixed[512][16];
 static float exp_table_float[512];
 static float expval_table_float[512][16];
 
@@ -59,10 +59,10 @@ static void mpegaudio_tableinit(void)
     for (exponent = 0; exponent < 512; exponent++) {
         for (value = 0; value < 16; value++) {
             double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5);
-            expval_table[exponent][value] = llrint(f);
+            expval_table_fixed[exponent][value] = llrint(f);
             expval_table_float[exponent][value] = f;
         }
-        exp_table[exponent] = expval_table[exponent][1];
+        exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
         exp_table_float[exponent] = expval_table_float[exponent][1];
     }
 }
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 35e217ea3e..77ecb44c9e 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -58,7 +58,7 @@
 #   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
 #   define MULH3(x, y, s) MULH((s)*(x), y)
 #   define MULLx(x, y, s) MULL(x,y,s)
-#   define RENAME(a)      a
+#   define RENAME(a)      a ## _fixed
 #   define OUT_FMT AV_SAMPLE_FMT_S16
 #endif
 
@@ -621,7 +621,7 @@ static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
    32 samples. */
 /* XXX: optimize by avoiding ring buffer usage */
 #if !CONFIG_FLOAT
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
                          MPA_INT *window, int *dither_state,
                          OUT_INT *samples, int incr,
                          INTFLOAT sb_samples[SBLIMIT])
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 76ecb79621..0f4dd18966 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1616,8 +1616,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
         OUT_INT *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
-                ff_mpa_synth_window, &dither_state,
+            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+                ff_mpa_synth_window_fixed, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
             samples_ptr += 32 * q->nb_channels;
@@ -1646,7 +1646,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
     initialized = 1;
 
     qdm2_init_vlc();
-    ff_mpa_synth_init(ff_mpa_synth_window);
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
     softclip_table_init();
     rnd_table_init();
     init_noise_samples();

From c7bd5edae4573d901583475608865c6f6ca64061 Mon Sep 17 00:00:00 2001
From: Vladimir Pantelic <vladoman@gmail.com>
Date: Thu, 12 May 2011 10:25:54 +0200
Subject: [PATCH 054/830] asfdec: fallback to binary search internally

lavf will do that anyway in case seek by index fails
---
 libavformat/asfdec.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 77c84490a0..e2161fda2a 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -1269,21 +1269,22 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int
     if (!asf->index_read)
         asf_build_simple_index(s, stream_index);
 
-    if(!(asf->index_read && st->index_entries)){
-        if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
-            return -1;
-    }else{
+    if((asf->index_read && st->index_entries)){
         index= av_index_search_timestamp(st, pts, flags);
-        if(index<0)
-            return -1;
-
+        if(index >= 0) {
         /* find the position */
         pos = st->index_entries[index].pos;
 
         /* do the seek */
         av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
         avio_seek(s->pb, pos, SEEK_SET);
+        asf_reset_header(s);
+        return 0;
+        }
     }
+    /* no index or seeking by index failed */
+    if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
+        return -1;
     asf_reset_header(s);
     return 0;
 }

From b58bc17cf72fcd79b6ed80faae2d0c88729def15 Mon Sep 17 00:00:00 2001
From: Vladimir Pantelic <vladoman@gmail.com>
Date: Thu, 12 May 2011 10:26:32 +0200
Subject: [PATCH 055/830] asfdec: reindent after previous commit c7bd5ed

---
 libavformat/asfdec.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index e2161fda2a..f0c746ace2 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -1272,14 +1272,14 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int
     if((asf->index_read && st->index_entries)){
         index= av_index_search_timestamp(st, pts, flags);
         if(index >= 0) {
-        /* find the position */
-        pos = st->index_entries[index].pos;
+            /* find the position */
+            pos = st->index_entries[index].pos;
 
-        /* do the seek */
-        av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
-        avio_seek(s->pb, pos, SEEK_SET);
-        asf_reset_header(s);
-        return 0;
+            /* do the seek */
+            av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
+            avio_seek(s->pb, pos, SEEK_SET);
+            asf_reset_header(s);
+            return 0;
         }
     }
     /* no index or seeking by index failed */

From 69fa23961ededd725c68b188493cf2653d70f4fd Mon Sep 17 00:00:00 2001
From: Vladimir Pantelic <vladoman@gmail.com>
Date: Tue, 17 May 2011 17:30:05 +0200
Subject: [PATCH 056/830] asfdec: do not fall back to binary/generic search

asf_read_seek() inside the asf demuxer already does the
right thing, it tries the index and if that fails it uses
binary search. If binary search is called from outside of asfdec.c
it will fail because the asf code cannot clean up after itself.
Therefore introduce AVFMT_NOBINSEARCH that prevents the seek
code to fallback to binary search and AVFMT_NOGENSEARCH that
prevents the seek code to fallback to generic search.
---
 libavformat/asfdec.c   | 1 +
 libavformat/avformat.h | 2 ++
 libavformat/utils.c    | 6 ++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index f0c746ace2..e9a3995705 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -1299,4 +1299,5 @@ AVInputFormat ff_asf_demuxer = {
     asf_read_close,
     asf_read_seek,
     asf_read_pts,
+    .flags = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH,
 };
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 732756222e..aca246d95a 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -258,6 +258,8 @@ typedef struct AVFormatParameters {
 #define AVFMT_VARIABLE_FPS  0x0400 /**< Format allows variable fps. */
 #define AVFMT_NODIMENSIONS  0x0800 /**< Format does not need width/height */
 #define AVFMT_NOSTREAMS     0x1000 /**< Format does not require any streams */
+#define AVFMT_NOBINSEARCH   0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */
+#define AVFMT_NOGENSEARCH   0x4000 /**< Format does not allow to fallback to generic search */
 
 typedef struct AVOutputFormat {
     const char *name;
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 67aa76ad75..ad226016aa 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1713,10 +1713,12 @@ int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int f
         return 0;
     }
 
-    if(s->iformat->read_timestamp)
+    if(s->iformat->read_timestamp && !(s->iformat->flags & AVFMT_NOBINSEARCH))
         return av_seek_frame_binary(s, stream_index, timestamp, flags);
-    else
+    else if (!(s->iformat->flags & AVFMT_NOGENSEARCH))
         return av_seek_frame_generic(s, stream_index, timestamp, flags);
+    else
+        return -1;
 }
 
 int avformat_seek_file(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)

From 29fa570d0c74c59a4a970f5ade9fbd126314cbd9 Mon Sep 17 00:00:00 2001
From: Uoti Urpala <uoti.urpala@pp1.inet.fi>
Date: Thu, 12 May 2011 10:20:27 -0400
Subject: [PATCH 057/830] asfdec: fix possible overread on broken files.

---
 libavformat/asfdec.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index e9a3995705..ed02d40fb9 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -852,7 +852,10 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
     }
     if (asf->packet_flags & 0x01) {
         DO_2BITS(asf->packet_segsizetype >> 6, asf->packet_frag_size, 0); // 0 is illegal
-        if(asf->packet_frag_size > asf->packet_size_left - rsize){
+        if (rsize > asf->packet_size_left) {
+            av_log(s, AV_LOG_ERROR, "packet_replic_size is invalid\n");
+            return -1;
+        } else if(asf->packet_frag_size > asf->packet_size_left - rsize){
             if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) {
                 av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n", asf->packet_size_left, rsize);
                 return -1;

From 13220b1856c98d83ad8ac237e789927cce0b9413 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Tue, 17 May 2011 12:49:42 -0700
Subject: [PATCH 058/830] flvdec: clenup debug code

---
 libavformat/flvdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index e7ec0b107f..6fdbf9b464 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -387,7 +387,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
     size = avio_rb24(s->pb);
     dts = avio_rb24(s->pb);
     dts |= avio_r8(s->pb) << 24;
-//    av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, dts:%d\n", type, size, dts);
+    av_dlog(s, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts);
     if (s->pb->eof_reached)
         return AVERROR_EOF;
     avio_skip(s->pb, 3); /* stream id, always 0 */
@@ -433,7 +433,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         st= create_stream(s, is_audio);
         s->ctx_flags &= ~AVFMTCTX_NOHEADER;
     }
-//    av_log(s, AV_LOG_DEBUG, "%d %X %d \n", is_audio, flags, st->discard);
+    av_dlog(s, "%d %X %d \n", is_audio, flags, st->discard);
     if(  (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||         is_audio))
        ||(st->discard >= AVDISCARD_BIDIR  &&  ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio))
        || st->discard >= AVDISCARD_ALL

From cc1ca9e534d540aa698d14763aee2933ca3dca59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Tue, 17 May 2011 19:46:08 +0200
Subject: [PATCH 059/830] Refactor the 'fmt ' tag search and parsing

Moving the search and parsing of the 'fmt ' info the main loop of wav_read_header() allows tags that precede it to be parsed.
Creating wav_parse_fmt_tag() makes wav_read_header() easier to read.
---
 libavformat/wav.c | 57 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 588aff512b..1832bc9660 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -184,6 +184,26 @@ static int wav_probe(AVProbeData *p)
     return 0;
 }
 
+static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
+{
+    AVIOContext *pb = s->pb;
+    int ret;
+
+    /* parse fmt header */
+    *st = av_new_stream(s, 0);
+    if (!*st)
+        return AVERROR(ENOMEM);
+
+    ff_get_wav_header(pb, (*st)->codec, size);
+    if (ret < 0)
+        return ret;
+    (*st)->need_parsing = AVSTREAM_PARSE_FULL;
+
+    av_set_pts_info(*st, 64, 1, (*st)->codec->sample_rate);
+
+    return 0;
+}
+
 /* wav input */
 static int wav_read_header(AVFormatContext *s,
                            AVFormatParameters *ap)
@@ -195,7 +215,8 @@ static int wav_read_header(AVFormatContext *s,
     AVIOContext *pb = s->pb;
     AVStream *st;
     WAVContext *wav = s->priv_data;
-    int ret;
+    int ret, got_fmt = 0;
+    int64_t next_tag_ofs;
 
     /* check RIFF header */
     tag = avio_rl32(pb);
@@ -220,32 +241,32 @@ static int wav_read_header(AVFormatContext *s,
         avio_skip(pb, size - 16); /* skip rest of ds64 chunk */
     }
 
-    /* parse fmt header */
-    size = find_tag(pb, MKTAG('f', 'm', 't', ' '));
-    if (size < 0)
-        return -1;
-    st = av_new_stream(s, 0);
-    if (!st)
-        return AVERROR(ENOMEM);
-
-    ret = ff_get_wav_header(pb, st->codec, size);
-    if (ret < 0)
-        return ret;
-    st->need_parsing = AVSTREAM_PARSE_FULL;
-
-    av_set_pts_info(st, 64, 1, st->codec->sample_rate);
 
     for (;;) {
         if (url_feof(pb))
             return -1;
         size = next_tag(pb, &tag);
-        if (tag == MKTAG('d', 'a', 't', 'a')){
+        next_tag_ofs = url_ftell(pb) + size;
+
+        if (tag == MKTAG('f', 'm', 't', ' ')) {
+            /* only parse the first 'fmt ' tag found */
+            if (!got_fmt && (ret = wav_parse_fmt_tag(s, size, &st) < 0)) {
+                return ret;
+            } else if (got_fmt)
+                av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");
+
+            got_fmt = 1;
+        } else if (tag == MKTAG('d', 'a', 't', 'a')) {
+            if (!got_fmt) {
+                av_log(s, AV_LOG_ERROR, "found no 'fmt ' tag before the 'data' tag\n");
+                return AVERROR_INVALIDDATA;
+            }
+
             break;
         }else if (tag == MKTAG('f','a','c','t') && !sample_count){
             sample_count = avio_rl32(pb);
-            size -= 4;
         }
-        avio_skip(pb, size);
+        avio_seek(pb, next_tag_ofs, SEEK_SET);
     }
     if (rf64)
         size = data_size;

From 3d922c84622e7bf8603390b154630c3d62b93b12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Tue, 17 May 2011 19:52:36 +0200
Subject: [PATCH 060/830] Make sure neither data_size nor sample_count is
 negative

---
 libavformat/wav.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 1832bc9660..6b1e574a6e 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -238,6 +238,12 @@ static int wav_read_header(AVFormatContext *s,
         avio_rl64(pb); /* RIFF size */
         data_size = avio_rl64(pb);
         sample_count = avio_rl64(pb);
+        if (data_size < 0 || sample_count < 0) {
+            av_log(s, AV_LOG_ERROR, "negative data_size and/or sample_count in "
+                   "ds64: data_size = %li, sample_count = %li\n",
+                   data_size, sample_count);
+            return AVERROR_INVALIDDATA;
+        }
         avio_skip(pb, size - 16); /* skip rest of ds64 chunk */
     }
 

From f0029cbcf68d77f4ea0ea0cc36596ea2a5305b13 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 29 Apr 2011 11:30:02 +0200
Subject: [PATCH 061/830] lavf: use designated initializers for AVClasses.

---
 libavformat/avio.c      | 8 ++++++--
 libavformat/crypto.c    | 5 ++++-
 libavformat/http.c      | 5 ++++-
 libavformat/mp3enc.c    | 8 ++++----
 libavformat/mpegtsenc.c | 8 ++++----
 libavformat/options.c   | 7 ++++++-
 libavformat/spdifenc.c  | 7 ++++++-
 7 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/libavformat/avio.c b/libavformat/avio.c
index 8881f269f2..ac15407fda 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c
@@ -39,8 +39,12 @@ static const char *urlcontext_to_name(void *ptr)
     else        return "NULL";
 }
 static const AVOption options[] = {{NULL}};
-static const AVClass urlcontext_class =
-        { "URLContext", urlcontext_to_name, options, LIBAVUTIL_VERSION_INT };
+static const AVClass urlcontext_class = {
+    .class_name     = "URLContext",
+    .item_name      = urlcontext_to_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
 /*@}*/
 
 static int default_interrupt_cb(void);
diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index fecc2c961c..789a4d1e76 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -52,7 +52,10 @@ static const AVOption options[] = {
 };
 
 static const AVClass crypto_class = {
-    "crypto", av_default_item_name, options, LIBAVUTIL_VERSION_INT
+    .class_name     = "crypto",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
 };
 
 static int crypto_open(URLContext *h, const char *uri, int flags)
diff --git a/libavformat/http.c b/libavformat/http.c
index ff8f2405eb..aa8c6657db 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -58,7 +58,10 @@ static const AVOption options[] = {
 {NULL}
 };
 static const AVClass httpcontext_class = {
-    "HTTP", av_default_item_name, options, LIBAVUTIL_VERSION_INT
+    .class_name     = "HTTP",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
 };
 
 static int http_connect(URLContext *h, const char *path, const char *hoststr,
diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c
index 2337837ef1..00ed6f8d4c 100644
--- a/libavformat/mp3enc.c
+++ b/libavformat/mp3enc.c
@@ -167,10 +167,10 @@ static const AVOption options[] = {
 };
 
 static const AVClass mp3_muxer_class = {
-    "MP3 muxer",
-    av_default_item_name,
-    options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name     = "MP3 muxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
 };
 
 static int id3v2_check_write_tag(AVFormatContext *s, AVMetadataTag *t, const char table[][4],
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 8fa8a56c90..2aa9698651 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -89,10 +89,10 @@ static const AVOption options[] = {
 };
 
 static const AVClass mpegts_muxer_class = {
-    "MPEGTS muxer",
-    av_default_item_name,
-    options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name     = "MPEGTS muxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
 };
 
 /* NOTE: 4 bytes must be left at the end for the crc32 */
diff --git a/libavformat/options.c b/libavformat/options.c
index bdf4796e49..483b644f8d 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -64,7 +64,12 @@ static const AVOption options[]={
 #undef D
 #undef DEFAULT
 
-static const AVClass av_format_context_class = { "AVFormatContext", format_to_name, options, LIBAVUTIL_VERSION_INT };
+static const AVClass av_format_context_class = {
+    .class_name     = "AVFormatContext",
+    .item_name      = format_to_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
 
 static void avformat_get_context_defaults(AVFormatContext *s)
 {
diff --git a/libavformat/spdifenc.c b/libavformat/spdifenc.c
index 24c2f15754..735546096f 100644
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c
@@ -93,7 +93,12 @@ static const AVOption options[] = {
 { NULL },
 };
 
-static const AVClass class = { "spdif", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
+static const AVClass class = {
+    .class_name     = "spdif",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
 
 static int spdif_header_ac3(AVFormatContext *s, AVPacket *pkt)
 {

From 29e3489602aeb72dbd8ceebfcfa7025e8a57acaf Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 29 Apr 2011 11:42:05 +0200
Subject: [PATCH 062/830] lavf: remove duplicate assignment in
 avformat_alloc_context.

AVClass is already initialized in  avformat_get_context_defaults.
---
 libavformat/options.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/options.c b/libavformat/options.c
index 483b644f8d..22807c3058 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -86,6 +86,5 @@ AVFormatContext *avformat_alloc_context(void)
     ic = av_malloc(sizeof(AVFormatContext));
     if (!ic) return ic;
     avformat_get_context_defaults(ic);
-    ic->av_class = &av_format_context_class;
     return ic;
 }

From e25c67108a77b2dbf13de1339b5314d07e3ffa02 Mon Sep 17 00:00:00 2001
From: Jindrich Makovicka <jindrich.makovicka@nangu.tv>
Date: Tue, 10 May 2011 15:11:45 +0200
Subject: [PATCH 063/830] libx264: handle closed GOP codec flag

Also update libx264 presets to keep closed gop as default.

Signed-off-by: Jindrich Makovicka <makovick@gmail.com>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 configure                                      | 4 ++--
 ffpresets/libx264-fast.ffpreset                | 2 +-
 ffpresets/libx264-fast_firstpass.ffpreset      | 2 +-
 ffpresets/libx264-faster.ffpreset              | 2 +-
 ffpresets/libx264-faster_firstpass.ffpreset    | 2 +-
 ffpresets/libx264-lossless_fast.ffpreset       | 2 +-
 ffpresets/libx264-lossless_max.ffpreset        | 2 +-
 ffpresets/libx264-lossless_medium.ffpreset     | 2 +-
 ffpresets/libx264-lossless_slow.ffpreset       | 2 +-
 ffpresets/libx264-lossless_slower.ffpreset     | 2 +-
 ffpresets/libx264-lossless_ultrafast.ffpreset  | 2 +-
 ffpresets/libx264-medium.ffpreset              | 2 +-
 ffpresets/libx264-medium_firstpass.ffpreset    | 2 +-
 ffpresets/libx264-placebo.ffpreset             | 2 +-
 ffpresets/libx264-placebo_firstpass.ffpreset   | 2 +-
 ffpresets/libx264-slow.ffpreset                | 2 +-
 ffpresets/libx264-slow_firstpass.ffpreset      | 2 +-
 ffpresets/libx264-slower.ffpreset              | 2 +-
 ffpresets/libx264-slower_firstpass.ffpreset    | 2 +-
 ffpresets/libx264-superfast.ffpreset           | 2 +-
 ffpresets/libx264-superfast_firstpass.ffpreset | 2 +-
 ffpresets/libx264-ultrafast.ffpreset           | 2 +-
 ffpresets/libx264-ultrafast_firstpass.ffpreset | 2 +-
 ffpresets/libx264-veryfast.ffpreset            | 2 +-
 ffpresets/libx264-veryfast_firstpass.ffpreset  | 2 +-
 ffpresets/libx264-veryslow.ffpreset            | 2 +-
 ffpresets/libx264-veryslow_firstpass.ffpreset  | 2 +-
 libavcodec/libx264.c                           | 2 ++
 28 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/configure b/configure
index d6a5d69c6c..5b81e0b599 100755
--- a/configure
+++ b/configure
@@ -2891,8 +2891,8 @@ enabled libvpx     && {
     enabled libvpx_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_enc_init_ver -lvpx ||
                                 die "ERROR: libvpx encoder version must be >=0.9.1"; } }
 enabled libx264    && require  libx264 x264.h x264_encoder_encode -lx264 &&
-                      { check_cpp_condition x264.h "X264_BUILD >= 99" ||
-                        die "ERROR: libx264 version must be >= 0.99."; }
+                      { check_cpp_condition x264.h "X264_BUILD >= 115" ||
+                        die "ERROR: libx264 version must be >= 0.115."; }
 enabled libxavs    && require  libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid    && require  libxvid xvid.h xvid_global -lxvidcore
 enabled mlib       && require  mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
diff --git a/ffpresets/libx264-fast.ffpreset b/ffpresets/libx264-fast.ffpreset
index 0fc1f22403..65201331bd 100644
--- a/ffpresets/libx264-fast.ffpreset
+++ b/ffpresets/libx264-fast.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-fast_firstpass.ffpreset b/ffpresets/libx264-fast_firstpass.ffpreset
index cdcbbbf227..6fdb4b9e55 100644
--- a/ffpresets/libx264-fast_firstpass.ffpreset
+++ b/ffpresets/libx264-fast_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-faster.ffpreset b/ffpresets/libx264-faster.ffpreset
index 3156cd8028..52efc1a325 100644
--- a/ffpresets/libx264-faster.ffpreset
+++ b/ffpresets/libx264-faster.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-faster_firstpass.ffpreset b/ffpresets/libx264-faster_firstpass.ffpreset
index 9bcf18ae9d..41a87fb6b3 100644
--- a/ffpresets/libx264-faster_firstpass.ffpreset
+++ b/ffpresets/libx264-faster_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset
index b7696b5bcb..49b9ed1add 100644
--- a/ffpresets/libx264-lossless_fast.ffpreset
+++ b/ffpresets/libx264-lossless_fast.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset
index 75c387f162..f32d7b40c6 100644
--- a/ffpresets/libx264-lossless_max.ffpreset
+++ b/ffpresets/libx264-lossless_max.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=esa
diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset
index 116e3343ce..0b84612fcb 100644
--- a/ffpresets/libx264-lossless_medium.ffpreset
+++ b/ffpresets/libx264-lossless_medium.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset
index 0d496f6e29..857d3d1986 100644
--- a/ffpresets/libx264-lossless_slow.ffpreset
+++ b/ffpresets/libx264-lossless_slow.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset
index 672e0cd637..ef0609f1b6 100644
--- a/ffpresets/libx264-lossless_slower.ffpreset
+++ b/ffpresets/libx264-lossless_slower.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset
index a2eda65edf..4cc84f1b4f 100644
--- a/ffpresets/libx264-lossless_ultrafast.ffpreset
+++ b/ffpresets/libx264-lossless_ultrafast.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-medium.ffpreset b/ffpresets/libx264-medium.ffpreset
index 3c90ec5d62..685995226d 100644
--- a/ffpresets/libx264-medium.ffpreset
+++ b/ffpresets/libx264-medium.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-medium_firstpass.ffpreset b/ffpresets/libx264-medium_firstpass.ffpreset
index 2ad0a9cc25..ca304ee24d 100644
--- a/ffpresets/libx264-medium_firstpass.ffpreset
+++ b/ffpresets/libx264-medium_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-placebo.ffpreset b/ffpresets/libx264-placebo.ffpreset
index 9f4719f71d..7923a76c74 100644
--- a/ffpresets/libx264-placebo.ffpreset
+++ b/ffpresets/libx264-placebo.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4+partb8x8
 me_method=tesa
diff --git a/ffpresets/libx264-placebo_firstpass.ffpreset b/ffpresets/libx264-placebo_firstpass.ffpreset
index 9f4719f71d..7923a76c74 100644
--- a/ffpresets/libx264-placebo_firstpass.ffpreset
+++ b/ffpresets/libx264-placebo_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4+partb8x8
 me_method=tesa
diff --git a/ffpresets/libx264-slow.ffpreset b/ffpresets/libx264-slow.ffpreset
index dabe0ae14e..fcbef4bcfc 100644
--- a/ffpresets/libx264-slow.ffpreset
+++ b/ffpresets/libx264-slow.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-slow_firstpass.ffpreset b/ffpresets/libx264-slow_firstpass.ffpreset
index 4af64dbe32..74f87b0c2d 100644
--- a/ffpresets/libx264-slow_firstpass.ffpreset
+++ b/ffpresets/libx264-slow_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-slower.ffpreset b/ffpresets/libx264-slower.ffpreset
index 239ee68cb4..741d21f920 100644
--- a/ffpresets/libx264-slower.ffpreset
+++ b/ffpresets/libx264-slower.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4+partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-slower_firstpass.ffpreset b/ffpresets/libx264-slower_firstpass.ffpreset
index 4b5b420c29..0be886a156 100644
--- a/ffpresets/libx264-slower_firstpass.ffpreset
+++ b/ffpresets/libx264-slower_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-superfast.ffpreset b/ffpresets/libx264-superfast.ffpreset
index fb2ab8c44f..7f0f50b782 100644
--- a/ffpresets/libx264-superfast.ffpreset
+++ b/ffpresets/libx264-superfast.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-superfast_firstpass.ffpreset b/ffpresets/libx264-superfast_firstpass.ffpreset
index 55ff9a2b7f..87b4f29012 100644
--- a/ffpresets/libx264-superfast_firstpass.ffpreset
+++ b/ffpresets/libx264-superfast_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-ultrafast.ffpreset b/ffpresets/libx264-ultrafast.ffpreset
index 28dc0eb836..561191e399 100644
--- a/ffpresets/libx264-ultrafast.ffpreset
+++ b/ffpresets/libx264-ultrafast.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=-loop
+flags=-loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-ultrafast_firstpass.ffpreset b/ffpresets/libx264-ultrafast_firstpass.ffpreset
index 28dc0eb836..561191e399 100644
--- a/ffpresets/libx264-ultrafast_firstpass.ffpreset
+++ b/ffpresets/libx264-ultrafast_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=-loop
+flags=-loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-veryfast.ffpreset b/ffpresets/libx264-veryfast.ffpreset
index bfa3d8fad2..d8c7f7a371 100644
--- a/ffpresets/libx264-veryfast.ffpreset
+++ b/ffpresets/libx264-veryfast.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-veryfast_firstpass.ffpreset b/ffpresets/libx264-veryfast_firstpass.ffpreset
index ac2332df44..7b2a1e93d2 100644
--- a/ffpresets/libx264-veryfast_firstpass.ffpreset
+++ b/ffpresets/libx264-veryfast_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/ffpresets/libx264-veryslow.ffpreset b/ffpresets/libx264-veryslow.ffpreset
index e07aeb82fa..82333655f9 100644
--- a/ffpresets/libx264-veryslow.ffpreset
+++ b/ffpresets/libx264-veryslow.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4+partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-veryslow_firstpass.ffpreset b/ffpresets/libx264-veryslow_firstpass.ffpreset
index e17c04b20b..2bbf4731f4 100644
--- a/ffpresets/libx264-veryslow_firstpass.ffpreset
+++ b/ffpresets/libx264-veryslow_firstpass.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partb8x8
 me_method=dia
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index bf5cbc501f..e5fac00469 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -298,6 +298,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
 
+    x4->params.b_open_gop     = !(avctx->flags & CODEC_FLAG_CLOSED_GOP);
+
     x4->params.i_slice_count  = avctx->slices;
 
     x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P;

From 9132f2ad0c064b5cebfc0e7ad562ac0cfd67a79e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 00:01:40 +0200
Subject: [PATCH 064/830] eval: opensolaris strtod() cannot handle 0x1234

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/eval.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavutil/eval.c b/libavutil/eval.c
index 98b4e0ac52..9271fd6cbc 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -75,7 +75,10 @@ double av_strtod(const char *numstr, char **tail)
 {
     double d;
     char *next;
-    d = strtod(numstr, &next);
+    if(numstr[0]=='0' && (numstr[1]|0x20)=='x') {
+        d = strtol(numstr, &next, 16);
+    } else
+        d = strtod(numstr, &next);
     /* if parsing succeeded, check for and interpret postfixes */
     if (next!=numstr) {
         if (*next >= 'E' && *next <= 'z') {

From a26d2b4bc8af02b27168c277c5097273c05652c2 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 17 May 2011 18:06:51 -0400
Subject: [PATCH 065/830] Fix compilation of iirfilter-test.

---
 libavcodec/iirfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c
index 56d17d2dd6..98366e2c08 100644
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -324,7 +324,7 @@ int main(void)
     int i;
     FILE* fd;
 
-    fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH,
+    fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH,
                                         FF_FILTER_MODE_LOWPASS, FILT_ORDER,
                                         cutoff_coeff, 0.0, 0.0);
     fstate  = ff_iir_filter_init_state(FILT_ORDER);

From 7a88617c43ce534d94591dd78d4958333492b939 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 00:03:19 +0200
Subject: [PATCH 066/830] configure: opensolaris install is not compatible with
 ffmpeg, allow overriding it.

ginstall works on opensolaris.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 configure | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 15f1c4293d..2ea8a02a43 100755
--- a/configure
+++ b/configure
@@ -1183,6 +1183,7 @@ CMDLINE_SET="
     host_ldflags
     host_libs
     host_os
+    install
     ld
     logfile
     malloc_prefix
@@ -1633,6 +1634,7 @@ ar_default="ar"
 cc_default="gcc"
 cc_version=\"unknown\"
 host_cc_default="gcc"
+install="install"
 ln_s="ln -sf"
 nm_default="nm"
 objformat="elf"
@@ -3351,7 +3353,7 @@ SDL_LIBS=$sdl_libs
 SDL_CFLAGS=$sdl_cflags
 LIB_INSTALL_EXTRA_CMD=$LIB_INSTALL_EXTRA_CMD
 EXTRALIBS=$extralibs
-INSTALL=install
+INSTALL=$install
 LIBTARGET=${LIBTARGET}
 SLIBNAME=${SLIBNAME}
 SLIBNAME_WITH_VERSION=${SLIBNAME_WITH_VERSION}

From b69e5ee9027ddc1820796253f2d2bcd4e6fdba00 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 17 May 2011 23:28:19 +0200
Subject: [PATCH 067/830] id3v2: add @file doxy and link to format
 documentation

---
 libavformat/id3v2.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 95353276b5..e1958bdf4f 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -1,5 +1,4 @@
 /*
- * ID3v2 header parser
  * Copyright (c) 2003 Fabrice Bellard
  *
  * This file is part of FFmpeg.
@@ -19,6 +18,14 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * ID3v2 header parser
+ *
+ * Specifications available at:
+ * http://id3.org/Developer_Information
+ */
+
 #include "id3v2.h"
 #include "id3v1.h"
 #include "libavutil/avstring.h"

From 64be0d1edad630f5bc0f287022f5880de07915b2 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 18 May 2011 00:43:25 +0200
Subject: [PATCH 068/830] id3v2: prevent unsigned integer overflow in
 ff_id3v2_parse()

In ff_id3v2_parse(), prevent unsigned integer overflow if data length
indicator is skipped and tlen is < 4.

Fix crash decoding file Allaby_cut.mp3, fix trac issue #182.
---
 libavformat/id3v2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index e1958bdf4f..3640b11ab1 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -255,6 +255,8 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
         next = avio_tell(s->pb) + tlen;
 
         if (tflags & ID3v2_FLAG_DATALEN) {
+            if (tlen < 4)
+                break;
             avio_rb32(s->pb);
             tlen -= 4;
         }

From 32ac63ee10ca5daa149344a75d736c1b98177392 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 10 May 2011 11:29:08 -0400
Subject: [PATCH 069/830] mdec.c: fix overread.

---
 libavcodec/mdec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 545b919411..9b6e6b6dd9 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -125,7 +125,8 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
     a->dsp.clear_blocks(block[0]);
 
     for(i=0; i<6; i++){
-        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0)
+        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0 ||
+            get_bits_left(&a->gb) < 0)
             return -1;
     }
     return 0;

From a64c58a24023b3e8c9b30bfb6908150b57117a3b Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Tue, 10 May 2011 11:29:09 -0400
Subject: [PATCH 070/830] mdec: enable frame-level multithreading.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/mdec.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 9b6e6b6dd9..02b69d045a 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -31,6 +31,7 @@
 #include "dsputil.h"
 #include "mpegvideo.h"
 #include "mpeg12.h"
+#include "thread.h"
 
 typedef struct MDECContext{
     AVCodecContext *avctx;
@@ -163,10 +164,10 @@ static int decode_frame(AVCodecContext *avctx,
     int i;
 
     if(p->data[0])
-        avctx->release_buffer(avctx, p);
+        ff_thread_release_buffer(avctx, p);
 
     p->reference= 0;
-    if(avctx->get_buffer(avctx, p) < 0){
+    if(ff_thread_get_buffer(avctx, p) < 0){
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
@@ -239,6 +240,18 @@ static av_cold int decode_init(AVCodecContext *avctx){
     return 0;
 }
 
+static av_cold int decode_init_thread_copy(AVCodecContext *avctx){
+    MDECContext * const a = avctx->priv_data;
+    AVFrame *p = (AVFrame*)&a->picture;
+
+    avctx->coded_frame = p;
+    a->avctx= avctx;
+
+    p->qscale_table = av_mallocz( a->mb_width);
+
+    return 0;
+}
+
 static av_cold int decode_end(AVCodecContext *avctx){
     MDECContext * const a = avctx->priv_data;
 
@@ -260,7 +273,8 @@ AVCodec ff_mdec_decoder = {
     NULL,
     decode_end,
     decode_frame,
-    CODEC_CAP_DR1,
+    CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
     .long_name= NULL_IF_CONFIG_SMALL("Sony PlayStation MDEC (Motion DECoder)"),
+    .init_thread_copy= ONLY_IF_THREADS_ENABLED(decode_init_thread_copy)
 };
 

From 8d44cd2cd82e27e6b051fe0606dece3b0bec0bcd Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 17 May 2011 10:26:30 -0400
Subject: [PATCH 071/830] h264: copy pixel_shift between slice threading
 contexts.

Fixes "make THREADS=2 THREAD_TYPE=2
fate-h264-conformance-frext-pph10i3_panasonic_a".
---
 libavcodec/h264.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 616d0a4804..6b262bc992 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1965,6 +1965,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             c->h264dsp = h->h264dsp;
             c->sps = h->sps;
             c->pps = h->pps;
+            c->pixel_shift = h->pixel_shift;
             init_scan_tables(c);
             clone_tables(c, h, i);
         }

From 508a24f8dc63e74bd9917e6f0c4cdbb744741ef0 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 17 May 2011 10:26:29 -0400
Subject: [PATCH 072/830] mpeg12: add slice-threading checks to slice-threading
 initializers.

Fixes "make THREADS=2 THREAD_TYPE=1 fate-mpeg2-field-enc".
---
 libavcodec/mpeg12.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index ffe6b94c05..88ed5332ae 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -2262,7 +2262,7 @@ static int decode_chunks(AVCodecContext *avctx,
         buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code);
         if (start_code > 0x1ff){
             if(s2->pict_type != AV_PICTURE_TYPE_B || avctx->skip_frame <= AVDISCARD_DEFAULT){
-                if(avctx->thread_count > 1){
+                if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1){
                     int i;
 
                     avctx->execute(avctx, slice_decode_thread,  &s2->thread_context[0], NULL, s->slice_count, sizeof(void*));
@@ -2430,7 +2430,7 @@ static int decode_chunks(AVCodecContext *avctx,
                     break;
                 }
 
-                if(avctx->thread_count > 1){
+                if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1){
                     int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
                     if(threshold <= mb_y){
                         MpegEncContext *thread_context= s2->thread_context[s->slice_count];
@@ -2541,7 +2541,7 @@ AVCodec ff_mpegvideo_decoder = {
 
 #if CONFIG_MPEG_XVMC_DECODER
 static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx){
-    if( avctx->thread_count > 1)
+    if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1)
         return -1;
     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
         return -1;

From dede36bd024fb23da244dbfab32b7ef18ca51c82 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 20:39:09 +0100
Subject: [PATCH 073/830] mpegaudio: sanitise compute_antialias_* names

This makes the compute_antialias functions use the same naming
convention as everything else.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodec.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 77ecb44c9e..4802a04bc8 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -41,7 +41,6 @@
 
 #if CONFIG_FLOAT
 #   define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
-#   define compute_antialias compute_antialias_float
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
 #   define FIXR(x)        ((float)(x))
 #   define FIXHR(x)       ((float)(x))
@@ -51,7 +50,6 @@
 #   define OUT_FMT AV_SAMPLE_FMT_FLT
 #else
 #   define SHR(a,b)       ((a)>>(b))
-#   define compute_antialias compute_antialias_integer
 /* WARNING: only correct for posititive numbers */
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
 #   define FIXR(a)        ((int)((a) * FRAC_ONE + 0.5))
@@ -69,7 +67,7 @@
 #include "mpegaudiodata.h"
 #include "mpegaudiodectab.h"
 
-static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
+static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
 static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
                                int *dither_state, OUT_INT *samples, int incr);
 
@@ -1480,8 +1478,7 @@ static void compute_stereo(MPADecodeContext *s,
 }
 
 #if !CONFIG_FLOAT
-static void compute_antialias_integer(MPADecodeContext *s,
-                              GranuleDef *g)
+static void compute_antialias_fixed(MPADecodeContext *s, GranuleDef *g)
 {
     int32_t *ptr, *csa;
     int n, i;
@@ -1848,7 +1845,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
             g = &s->granules[ch][gr];
 
             reorder_block(s, g);
-            compute_antialias(s, g);
+            RENAME(compute_antialias)(s, g);
             compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
         }
     } /* gr */

From 70378ea1902b57ccb221157fbd3a411d11046ad0 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 18 May 2011 13:05:39 +0100
Subject: [PATCH 074/830] fate: run aref and vref as regular tests

These tests create reference files used for psnr calculation in
the other codec tests.  Treating them as (mostly) regular tests
simplifies the makefile and makes them visible in the fate reports.
The latter makes errors in these runs easier to identify.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile                  | 16 +++++++---------
 tests/codec-regression.sh |  4 ++--
 tests/ref/acodec/aref     |  2 ++
 tests/ref/vsynth1/vref    |  2 ++
 tests/ref/vsynth2/vref    |  2 ++
 5 files changed, 15 insertions(+), 11 deletions(-)
 create mode 100644 tests/ref/acodec/aref
 create mode 100644 tests/ref/vsynth1/vref
 create mode 100644 tests/ref/vsynth2/vref

diff --git a/Makefile b/Makefile
index d69004b474..a155eea58b 100644
--- a/Makefile
+++ b/Makefile
@@ -187,18 +187,16 @@ lavftest:  fate-lavf
 lavfitest: fate-lavfi
 seektest:  fate-seek
 
-AREF = tests/data/acodec.ref.wav
-VREF = tests/data/vsynth1.ref.yuv
+AREF = fate-acodec-aref
+VREF = fate-vsynth1-vref fate-vsynth2-vref
 REFS = $(AREF) $(VREF)
 
-$(REFS): TAG = GEN
-
 $(VREF): ffmpeg$(EXESUF) tests/vsynth1/00.pgm tests/vsynth2/00.pgm
-	$(M)$(SRC_PATH)/tests/codec-regression.sh vref vsynth1 tests/vsynth1 "$(TARGET_EXEC)" "$(TARGET_PATH)"
-	$(Q)$(SRC_PATH)/tests/codec-regression.sh vref vsynth2 tests/vsynth2 "$(TARGET_EXEC)" "$(TARGET_PATH)"
-
 $(AREF): ffmpeg$(EXESUF) tests/data/asynth1.sw
-	$(M)$(SRC_PATH)/tests/codec-regression.sh aref acodec tests/acodec "$(TARGET_EXEC)" "$(TARGET_PATH)"
+
+fate-acodec-aref:  CMD = codectest acodec
+fate-vsynth1-vref: CMD = codectest vsynth1
+fate-vsynth2-vref: CMD = codectest vsynth2
 
 ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
 	@echo
@@ -287,7 +285,7 @@ FATE_UTILS = base64 tiny_psnr
 
 fate: $(FATE)
 
-$(FATE): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
+$(FATE) $(REFS): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 	@echo "TEST    $(@:fate-%=%)"
 	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)'
 
diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh
index 2ee6166c33..9b2aa6308d 100755
--- a/tests/codec-regression.sh
+++ b/tests/codec-regression.sh
@@ -16,10 +16,10 @@ rm -f "$benchfile"
 
 # generate reference for quality check
 if [ -n "$do_vref" ]; then
-do_ffmpeg_nocheck $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo $target_path/$raw_ref
+do_ffmpeg $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo
 fi
 if [ -n "$do_aref" ]; then
-do_ffmpeg_nocheck $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav $target_path/$pcm_ref
+do_ffmpeg $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav
 fi
 
 if [ -n "$do_mpeg" ] ; then
diff --git a/tests/ref/acodec/aref b/tests/ref/acodec/aref
new file mode 100644
index 0000000000..8e6773be3b
--- /dev/null
+++ b/tests/ref/acodec/aref
@@ -0,0 +1,2 @@
+95e54b261530a1bcf6de6fe3b21dc5f6 *./tests/data/acodec.ref.wav
+1058444 ./tests/data/acodec.ref.wav
diff --git a/tests/ref/vsynth1/vref b/tests/ref/vsynth1/vref
new file mode 100644
index 0000000000..2defdac870
--- /dev/null
+++ b/tests/ref/vsynth1/vref
@@ -0,0 +1,2 @@
+c5ccac874dbf808e9088bc3107860042 *./tests/data/vsynth1.ref.yuv
+7603200 ./tests/data/vsynth1.ref.yuv
diff --git a/tests/ref/vsynth2/vref b/tests/ref/vsynth2/vref
new file mode 100644
index 0000000000..8f83b6c7ba
--- /dev/null
+++ b/tests/ref/vsynth2/vref
@@ -0,0 +1,2 @@
+dde5895817ad9d219f79a52d0bdfb001 *./tests/data/vsynth2.ref.yuv
+7603200 ./tests/data/vsynth2.ref.yuv

From a06bf6368be2acb760a10289645eba1f65bf967b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 16:37:38 +0100
Subject: [PATCH 075/830] mpegaudiodec: remove decode_end() function

This function is not needed since 721d6f2dc5 removed the DCT table
allocations for the configuration used here.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodec_float.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index e9041fcb63..0ef85d19c1 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -80,13 +80,6 @@ static void compute_antialias_float(MPADecodeContext *s,
     }
 }
 
-static av_cold int decode_end(AVCodecContext * avctx)
-{
-    MPADecodeContext *s = avctx->priv_data;
-    ff_dct_end(&s->dct);
-    return 0;
-}
-
 #if CONFIG_MP1FLOAT_DECODER
 AVCodec ff_mp1float_decoder =
 {
@@ -96,7 +89,7 @@ AVCodec ff_mp1float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -112,7 +105,7 @@ AVCodec ff_mp2float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -128,7 +121,7 @@ AVCodec ff_mp3float_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,
@@ -144,7 +137,7 @@ AVCodec ff_mp3adufloat_decoder =
     sizeof(MPADecodeContext),
     decode_init,
     NULL,
-    decode_end,
+    .close = NULL,
     decode_frame_adu,
     CODEC_CAP_PARSE_ONLY,
     .flush= flush,

From d4a7df423c7eda78185d79fb81ffe36b3d4253c4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 18 May 2011 15:05:26 +0100
Subject: [PATCH 076/830] fate: do not collect -benchmark output

The old regtest scripts pass -benchmark and collect the utime values.
As these values are never used, this machinery can be removed.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/codec-regression.sh |  1 -
 tests/lavf-regression.sh  |  1 -
 tests/lavfi-regression.sh |  1 -
 tests/regression-funcs.sh | 17 ++++-------------
 4 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh
index 9b2aa6308d..335e1f70f7 100755
--- a/tests/codec-regression.sh
+++ b/tests/codec-regression.sh
@@ -12,7 +12,6 @@ set -e
 eval do_$test=y
 
 rm -f "$logfile"
-rm -f "$benchfile"
 
 # generate reference for quality check
 if [ -n "$do_vref" ]; then
diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh
index 28f53f78b0..32d92de0b9 100755
--- a/tests/lavf-regression.sh
+++ b/tests/lavf-regression.sh
@@ -44,7 +44,6 @@ do_audio_only()
 }
 
 rm -f "$logfile"
-rm -f "$benchfile"
 
 if [ -n "$do_avi" ] ; then
 do_lavf avi
diff --git a/tests/lavfi-regression.sh b/tests/lavfi-regression.sh
index 17988192df..b2a195a90e 100755
--- a/tests/lavfi-regression.sh
+++ b/tests/lavfi-regression.sh
@@ -12,7 +12,6 @@ set -e
 eval do_$test=y
 
 rm -f "$logfile"
-rm -f "$benchfile"
 
 do_video_filter() {
     label=$1
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 0e4ea44f46..035cbdb2f3 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -23,9 +23,6 @@ errfile="$datadir/$this.err"
 # various files
 ffmpeg="$target_exec ${target_path}/ffmpeg"
 tiny_psnr="tests/tiny_psnr"
-benchfile="$datadir/$this.bench"
-bench="$datadir/$this.bench.tmp"
-bench2="$datadir/$this.bench2.tmp"
 raw_src="${target_path}/$raw_src_dir/%02d.pgm"
 raw_dst="$datadir/$this.out.yuv"
 raw_ref="$datadir/$test_ref.ref.yuv"
@@ -35,7 +32,7 @@ pcm_ref="$datadir/$test_ref.ref.wav"
 crcfile="$datadir/$this.crc"
 target_crcfile="$target_datadir/$this.crc"
 
-cleanfiles="$raw_dst $pcm_dst $crcfile $bench $bench2"
+cleanfiles="$raw_dst $pcm_dst $crcfile"
 trap 'rm -f -- $cleanfiles' EXIT
 
 mkdir -p "$datadir"
@@ -69,7 +66,7 @@ do_ffmpeg()
     f="$1"
     shift
     set -- $* ${target_path}/$f
-    run_ffmpeg -benchmark $* > $bench
+    run_ffmpeg $*
     do_md5sum $f >> $logfile
     if [ $f = $raw_dst ] ; then
         $tiny_psnr $f $raw_ref >> $logfile
@@ -78,8 +75,6 @@ do_ffmpeg()
     else
         wc -c $f >> $logfile
     fi
-    expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2
-    echo $(cat $bench2) $f >> $benchfile
 }
 
 do_ffmpeg_nomd5()
@@ -87,7 +82,7 @@ do_ffmpeg_nomd5()
     f="$1"
     shift
     set -- $* ${target_path}/$f
-    run_ffmpeg -benchmark $* > $bench
+    run_ffmpeg $*
     if [ $f = $raw_dst ] ; then
         $tiny_psnr $f $raw_ref >> $logfile
     elif [ $f = $pcm_dst ] ; then
@@ -95,8 +90,6 @@ do_ffmpeg_nomd5()
     else
         wc -c $f >> $logfile
     fi
-    expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2
-    echo $(cat $bench2) $f >> $benchfile
 }
 
 do_ffmpeg_crc()
@@ -111,9 +104,7 @@ do_ffmpeg_nocheck()
 {
     f="$1"
     shift
-    run_ffmpeg -benchmark $* > $bench
-    expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2
-    echo $(cat $bench2) $f >> $benchfile
+    run_ffmpeg $*
 }
 
 do_video_decoding()

From 1a14a27603fec02498627b6f92cea7bd57c8d41f Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 18 May 2011 15:13:58 +0100
Subject: [PATCH 077/830] fate: remove do_ffmpeg_nocheck function

This function is essentially an alias for run_ffmpeg and is only
used in one place.  This patch removes the function and replaces
the call with the equivalent (simpler) run_ffmpeg call.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/lavf-regression.sh  | 4 ++--
 tests/regression-funcs.sh | 7 -------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh
index 32d92de0b9..94d258334b 100755
--- a/tests/lavf-regression.sh
+++ b/tests/lavf-regression.sh
@@ -226,8 +226,8 @@ conversions="yuv420p yuv422p yuv444p yuyv422 yuv410p yuv411p yuvj420p \
              monob yuv440p yuvj440p"
 for pix_fmt in $conversions ; do
     file=${outfile}${pix_fmt}.yuv
-    do_ffmpeg_nocheck $file $DEC_OPTS -r 1 -t 1 -f image2 -vcodec pgmyuv -i $raw_src \
-                            $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt $target_path/$raw_dst
+    run_ffmpeg $DEC_OPTS -r 1 -t 1 -f image2 -vcodec pgmyuv -i $raw_src \
+               $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt $target_path/$raw_dst
     do_ffmpeg $file $DEC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt -i $target_path/$raw_dst \
                     $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt yuv444p
 done
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 035cbdb2f3..b79c258e77 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -100,13 +100,6 @@ do_ffmpeg_crc()
     echo "$f $(cat $crcfile)" >> $logfile
 }
 
-do_ffmpeg_nocheck()
-{
-    f="$1"
-    shift
-    run_ffmpeg $*
-}
-
 do_video_decoding()
 {
     do_ffmpeg $raw_dst $DEC_OPTS $1 -i $target_path/$file -f rawvideo $ENC_OPTS $2

From e89403216cc15563a7f701a9e6547abf2b8ad7a9 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 13:58:13 +0200
Subject: [PATCH 078/830] ffmpeg: dont show_banner() on verbose<0

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index ca44c82b86..90c7d71d2f 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -4539,7 +4539,8 @@ int main(int argc, char **argv)
 
     init_opts();
 
-    show_banner();
+    if(verbose>=0)
+        show_banner();
 
     /* parse options */
     parse_options(argc, argv, options, opt_output_file);

From d1f9621d39b80add240d09daf1328ceae613bcd4 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 15:39:40 +0200
Subject: [PATCH 079/830] libx264: support passing arbitrary parameters.

Idea taken from mencoder. This should fix conflicts with presets as long as
the new system is used.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libx264.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index eae21fe2bb..519dc511ec 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -42,6 +42,7 @@ typedef struct X264Context {
     int fastfirstpass;
     char *stats;
     char *weightp;
+    char *x264opts;
 } X264Context;
 
 static void X264_log(void *p, int level, const char *fmt, va_list args)
@@ -344,6 +345,17 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     OPT_STR("level", x4->level);
 
+    if(x4->x264opts){
+        const char *p= x4->x264opts;
+        while(p){
+            char param[256]={0}, val[256]={0};
+            sscanf(p, "%255[^:=]=%255[^:]", param, val);
+            OPT_STR(param, val);
+            p= strchr(p, ':');
+            p+=!!p;
+        }
+    }
+
     if (x4->fastfirstpass)
         x264_param_apply_fastfirstpass(&x4->params);
 
@@ -416,6 +428,7 @@ static const AVOption options[] = {
     {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
+    {"x264opts", "x264 options", OFFSET(x264opts), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
     { NULL },
 };
 

From e3ff6e8d0fb222a8fce7c747781258e6f5e47a70 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 16:36:08 +0200
Subject: [PATCH 080/830] configure: favor pkg_config over sdl_config

This fixes linking issues on ubuntu.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 configure | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configure b/configure
index 0e7e94c0e7..41a66d7ab4 100755
--- a/configure
+++ b/configure
@@ -2943,19 +2943,19 @@ if enabled libdc1394; then
 fi
 
 SDL_CONFIG="${cross_prefix}sdl-config"
-if "${SDL_CONFIG}" --version > /dev/null 2>&1; then
+if check_pkg_config sdl SDL_version.h SDL_Linked_Version; then
+    check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags &&
+    enable sdl &&
+    check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size
+else
+  if "${SDL_CONFIG}" --version > /dev/null 2>&1; then
     sdl_cflags=$("${SDL_CONFIG}" --cflags)
     sdl_libs=$("${SDL_CONFIG}" --libs)
     check_func_headers SDL_version.h SDL_Linked_Version $sdl_cflags $sdl_libs &&
     check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags &&
     enable sdl &&
     check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size
-else
-    if check_pkg_config sdl SDL_version.h SDL_Linked_Version; then
-        check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags &&
-        enable sdl &&
-        check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size
-    fi
+  fi
 fi
 
 texi2html -version > /dev/null 2>&1 && enable texi2html || disable texi2html

From 8146d16b60ec84aae93cfc4a9444d879f9d5bbfc Mon Sep 17 00:00:00 2001
From: JULIAN GARDNER <joolzg@btinternet.com>
Date: Wed, 18 May 2011 16:51:20 +0200
Subject: [PATCH 081/830] cleaned up the udp.c, removed some variables and an
 av_log

---
 libavformat/udp.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavformat/udp.c b/libavformat/udp.c
index 802cbd5975..8c8c195fc4 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c
@@ -57,7 +57,6 @@ typedef struct {
     /* Circular Buffer variables for use in UDP receive code */
     int circular_buffer_size;
     AVFifoBuffer *fifo;
-    int circular_buffer_available_max;
     int circular_buffer_error;
     pthread_t circular_buffer_thread;
 } UDPContext;
@@ -529,7 +528,6 @@ static int udp_read(URLContext *h, uint8_t *buf, int size)
     UDPContext *s = h->priv_data;
     int ret;
     int avail;
-    int left;
     fd_set rfds;
     struct timeval tv;
 
@@ -594,7 +592,6 @@ static int udp_close(URLContext *h)
     if (s->is_multicast && (h->flags & AVIO_FLAG_READ))
         udp_leave_multicast_group(s->udp_fd, (struct sockaddr *)&s->dest_addr);
     closesocket(s->udp_fd);
-    av_log( h, AV_LOG_INFO, "circular_buffer_info max:%d%%\r\n", (s->circular_buffer_available_max*100)/s->circular_buffer_size);
     av_fifo_free(s->fifo);
     av_free(s);
     return 0;

From 420ebd1850de594cfdbb573237ba9e2df39946ad Mon Sep 17 00:00:00 2001
From: Compn <tempn@twmi.rr.com>
Date: Wed, 18 May 2011 11:50:34 -0400
Subject: [PATCH 082/830] add x264opts entry to docs

---
 doc/ffmpeg.texi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 62b8996f02..70ad6c5551 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -312,6 +312,13 @@ the input video.
 Use the option "-filters" to show all the available filters (including
 also sources and sinks).
 
+@item -x264opts @var{option}
+Allows you to set any x264 option, see x264 manual for a list.
+@example
+ffmpeg -i foo.mpg -vcodec libx264 -x264opts keyint=123:min-keyint=20 -an out.mkv
+@end example
+
+
 @end table
 
 @section Advanced Video Options

From 93df511ec78a190a35b66f22e3a151f1b646a19a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Wed, 18 May 2011 13:47:23 +0200
Subject: [PATCH 083/830] Use avio_tell() instead of url_ftell()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 6b1e574a6e..1296ccb0e7 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -252,7 +252,7 @@ static int wav_read_header(AVFormatContext *s,
         if (url_feof(pb))
             return -1;
         size = next_tag(pb, &tag);
-        next_tag_ofs = url_ftell(pb) + size;
+        next_tag_ofs = avio_tell(pb) + size;
 
         if (tag == MKTAG('f', 'm', 't', ' ')) {
             /* only parse the first 'fmt ' tag found */

From a06d238e8cc44ea970ebb2b671389c78bc5bb251 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Thu, 17 Feb 2011 15:58:10 +0100
Subject: [PATCH 084/830] Refactor the tag checking into a switch statement

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 1296ccb0e7..b3067cbdef 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -254,7 +254,8 @@ static int wav_read_header(AVFormatContext *s,
         size = next_tag(pb, &tag);
         next_tag_ofs = avio_tell(pb) + size;
 
-        if (tag == MKTAG('f', 'm', 't', ' ')) {
+        switch (tag) {
+        case MKTAG('f', 'm', 't', ' '):
             /* only parse the first 'fmt ' tag found */
             if (!got_fmt && (ret = wav_parse_fmt_tag(s, size, &st) < 0)) {
                 return ret;
@@ -262,18 +263,22 @@ static int wav_read_header(AVFormatContext *s,
                 av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");
 
             got_fmt = 1;
-        } else if (tag == MKTAG('d', 'a', 't', 'a')) {
+            break;
+        case MKTAG('d', 'a', 't', 'a'):
             if (!got_fmt) {
                 av_log(s, AV_LOG_ERROR, "found no 'fmt ' tag before the 'data' tag\n");
                 return AVERROR_INVALIDDATA;
             }
 
-            break;
-        }else if (tag == MKTAG('f','a','c','t') && !sample_count){
+            goto break_loop;
+        case MKTAG('f','a','c','t'):
+            if(!sample_count)
             sample_count = avio_rl32(pb);
+            break;
         }
         avio_seek(pb, next_tag_ofs, SEEK_SET);
     }
+break_loop:
     if (rf64)
         size = data_size;
     if (size < 0)

From 6629dbd299717169c2dc8cf9218cf98e43322fb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Wed, 18 May 2011 13:48:02 +0200
Subject: [PATCH 085/830] Keep parsing wav until EOF if the input is seekable
 and we know the size of the data tag

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index b3067cbdef..bfc93d566e 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -216,7 +216,7 @@ static int wav_read_header(AVFormatContext *s,
     AVStream *st;
     WAVContext *wav = s->priv_data;
     int ret, got_fmt = 0;
-    int64_t next_tag_ofs;
+    int64_t next_tag_ofs, data_ofs = -1;
 
     /* check RIFF header */
     tag = avio_rl32(pb);
@@ -247,13 +247,19 @@ static int wav_read_header(AVFormatContext *s,
         avio_skip(pb, size - 16); /* skip rest of ds64 chunk */
     }
 
-
     for (;;) {
-        if (url_feof(pb))
-            return -1;
         size = next_tag(pb, &tag);
         next_tag_ofs = avio_tell(pb) + size;
 
+        if (url_feof(pb)) {
+            if (data_ofs < 0) {
+                av_log(s, AV_LOG_ERROR, "no 'data' tag found\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            break;
+        }
+
         switch (tag) {
         case MKTAG('f', 'm', 't', ' '):
             /* only parse the first 'fmt ' tag found */
@@ -270,7 +276,21 @@ static int wav_read_header(AVFormatContext *s,
                 return AVERROR_INVALIDDATA;
             }
 
+            if (rf64) {
+                next_tag_ofs = wav->data_end = avio_tell(pb) + data_size;
+            } else {
+                data_size = size;
+                wav->data_end = size ? next_tag_ofs : INT64_MAX;
+            }
+
+            /* don't look for footer metadata if we can't seek or if we don't
+             * know where the data tag ends
+             */
+            if (!pb->seekable || (!rf64 && !size))
             goto break_loop;
+
+            data_ofs = avio_tell(pb);
+            break;
         case MKTAG('f','a','c','t'):
             if(!sample_count)
             sample_count = avio_rl32(pb);
@@ -279,17 +299,11 @@ static int wav_read_header(AVFormatContext *s,
         avio_seek(pb, next_tag_ofs, SEEK_SET);
     }
 break_loop:
-    if (rf64)
-        size = data_size;
-    if (size < 0)
-        return -1;
-    if (!size) {
-        wav->data_end = INT64_MAX;
-    } else
-        wav->data_end= avio_tell(pb) + size;
+    if (data_ofs >= 0)
+        avio_seek(pb, data_ofs, SEEK_SET);
 
     if (!sample_count && st->codec->channels && av_get_bits_per_sample(st->codec->codec_id))
-        sample_count = (size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id));
+        sample_count = (data_size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id));
     if (sample_count)
         st->duration = sample_count;
     return 0;

From 40612f260160014165f045d66da1d7e35ce857cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Thu, 3 Mar 2011 16:41:11 +0100
Subject: [PATCH 086/830] Cosmetics: indent

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index bfc93d566e..2d6eea2ed9 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -287,13 +287,13 @@ static int wav_read_header(AVFormatContext *s,
              * know where the data tag ends
              */
             if (!pb->seekable || (!rf64 && !size))
-            goto break_loop;
+                goto break_loop;
 
             data_ofs = avio_tell(pb);
             break;
         case MKTAG('f','a','c','t'):
             if(!sample_count)
-            sample_count = avio_rl32(pb);
+                sample_count = avio_rl32(pb);
             break;
         }
         avio_seek(pb, next_tag_ofs, SEEK_SET);

From 7e5cb2ef51650d1c3050cfa09bf4496813df5e57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Wed, 18 May 2011 13:48:39 +0200
Subject: [PATCH 087/830] Parse 'bext' metadata in the wav demuxer

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 2d6eea2ed9..9f7b3c75f9 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -204,6 +204,86 @@ static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
     return 0;
 }
 
+static inline int wav_parse_bext_string(AVFormatContext *s, const char *key, int length)
+{
+    char temp[257];
+    int ret;
+
+    if ((ret = avio_read(s->pb, temp, length)) < 0)
+        return ret;
+
+    temp[length] = 0;
+
+    if (strlen(temp))
+        return av_metadata_set2(&s->metadata, key, temp, 0);
+
+    return 0;
+}
+
+static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
+{
+    char temp[131], *coding_history;
+    int ret, x;
+    uint64_t time_reference;
+    int64_t umid_parts[8], umid_mask = 0;
+
+    if ((ret = wav_parse_bext_string(s, "description", 256)) < 0 ||
+        (ret = wav_parse_bext_string(s, "originator", 32)) < 0 ||
+        (ret = wav_parse_bext_string(s, "originator_reference", 32)) < 0 ||
+        (ret = wav_parse_bext_string(s, "origination_date", 10)) < 0 ||
+        (ret = wav_parse_bext_string(s, "origination_time", 8)) < 0)
+        return ret;
+
+    time_reference = avio_rl64(s->pb);
+    snprintf(temp, sizeof(temp), "%lu", time_reference);
+    if ((ret = av_metadata_set2(&s->metadata, "time_reference", temp, 0)) < 0)
+        return ret;
+
+    /* check if version is >= 1, in which case an UMID may be present */
+    if (avio_rl16(s->pb) >= 1) {
+        for (x = 0; x < 8; x++)
+            umid_mask |= umid_parts[x] = avio_rb64(s->pb);
+
+        if (umid_mask) {
+            /* the string formatting below is per SMPTE 330M-2004 Annex C */
+            if (umid_parts[4] == 0 && umid_parts[5] == 0 && umid_parts[6] == 0 && umid_parts[7] == 0) {
+                /* basic UMID */
+                snprintf(temp, sizeof(temp), "0x%016lX%016lX%016lX%016lX",
+                         umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3]);
+            } else {
+                /* extended UMID */
+                snprintf(temp, sizeof(temp), "0x%016lX%016lX%016lX%016lX%016lX%016lX%016lX%016lX",
+                         umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3],
+                         umid_parts[4], umid_parts[5], umid_parts[6], umid_parts[7]);
+            }
+
+            if ((ret = av_metadata_set2(&s->metadata, "umid", temp, 0)) < 0)
+                return ret;
+        }
+
+        avio_skip(s->pb, 190);
+    } else
+        avio_skip(s->pb, 254);
+
+    if (size > 602) {
+        /* CodingHistory present */
+        size -= 602;
+
+        if (!(coding_history = av_malloc(size+1)))
+            return AVERROR(ENOMEM);
+
+        if ((ret = avio_read(s->pb, coding_history, size)) < 0)
+            return ret;
+
+        coding_history[size] = 0;
+        if ((ret = av_metadata_set2(&s->metadata, "coding_history", coding_history,
+                                    AV_METADATA_DONT_STRDUP_VAL)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 /* wav input */
 static int wav_read_header(AVFormatContext *s,
                            AVFormatParameters *ap)
@@ -295,6 +375,10 @@ static int wav_read_header(AVFormatContext *s,
             if(!sample_count)
                 sample_count = avio_rl32(pb);
             break;
+        case MKTAG('b','e','x','t'):
+            if ((ret = wav_parse_bext_tag(s, size)) < 0)
+                return ret;
+            break;
         }
         avio_seek(pb, next_tag_ofs, SEEK_SET);
     }

From 164c24c858047f304a81278615409503c6a7da17 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 18 May 2011 17:23:47 +0100
Subject: [PATCH 088/830] fate: remove redundant aref and vref references

The ref targets are included in the FATE_[AV]CODEC lists created
by configure so they do not need to be listed separately in the
makefile.  Filter them out when setting dependencies to avoid make
warnings about circular deps.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index a155eea58b..0ed6207961 100644
--- a/Makefile
+++ b/Makefile
@@ -194,10 +194,6 @@ REFS = $(AREF) $(VREF)
 $(VREF): ffmpeg$(EXESUF) tests/vsynth1/00.pgm tests/vsynth2/00.pgm
 $(AREF): ffmpeg$(EXESUF) tests/data/asynth1.sw
 
-fate-acodec-aref:  CMD = codectest acodec
-fate-vsynth1-vref: CMD = codectest vsynth1
-fate-vsynth2-vref: CMD = codectest vsynth2
-
 ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
 	@echo
 	@echo "Unfortunately ffserver is broken and therefore its regression"
@@ -250,8 +246,8 @@ FATE = $(FATE_ACODEC)                                                   \
        $(FATE_LAVFI)                                                    \
        $(FATE_SEEK)                                                     \
 
-$(FATE_ACODEC): $(AREF)
-$(FATE_VCODEC): $(VREF)
+$(filter-out %-aref,$(FATE_ACODEC)): $(AREF)
+$(filter-out %-vref,$(FATE_VCODEC)): $(VREF)
 $(FATE_LAVF):   $(REFS)
 $(FATE_LAVFI):  $(REFS) tools/lavfi-showfiltfmts$(EXESUF)
 $(FATE_SEEK):   fate-codec fate-lavf tests/seek_test$(EXESUF)
@@ -285,7 +281,7 @@ FATE_UTILS = base64 tiny_psnr
 
 fate: $(FATE)
 
-$(FATE) $(REFS): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
+$(FATE): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 	@echo "TEST    $(@:fate-%=%)"
 	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)'
 

From f907ad9b85d5e08e4a024e24734181940cd4fc48 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 18 May 2011 17:13:15 +0100
Subject: [PATCH 089/830] Remove unused make variable SEEK_REFFILE

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0ed6207961..afd783da5b 100644
--- a/Makefile
+++ b/Makefile
@@ -180,7 +180,6 @@ check: test checkheaders
 fulltest test: codectest lavftest lavfitest seektest
 
 FFSERVER_REFFILE = $(SRC_PATH)/tests/ffserver.regression.ref
-SEEK_REFFILE     = $(SRC_PATH)/tests/seek.regression.ref
 
 codectest: fate-codec
 lavftest:  fate-lavf

From 32f8fb8ecf8178b9c9ec8d7152f1fdd8537f7f3a Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Sun, 24 Apr 2011 17:50:17 -0400
Subject: [PATCH 090/830] Add float_interleave() to FmtConvertContext with
 x86-optimized versions.

Partially based on patches by clsid2 in ffdshow-tryout.
ff_float_interleave6() x86 improvements by Loren Merrit.
---
 libavcodec/fmtconvert.c         |  20 +++++
 libavcodec/fmtconvert.h         |   9 ++
 libavcodec/x86/fmtconvert.asm   | 141 ++++++++++++++++++++++++++++++++
 libavcodec/x86/fmtconvert_mmx.c |  30 +++++++
 4 files changed, 200 insertions(+)

diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e9707555af..58fece70b2 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -56,11 +56,31 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
     }
 }
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels)
+{
+    int j, c;
+    unsigned int i;
+    if (channels == 2) {
+        for (i = 0; i < len; i++) {
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    } else if (channels == 1 && len < INT_MAX / sizeof(float)) {
+        memcpy(dst, src[0], len * sizeof(float));
+    } else {
+        for (c = 0; c < channels; c++)
+            for (i = 0, j = c; i < len; i++, j += channels)
+                dst[j] = src[c][i];
+    }
+}
+
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
+    c->float_interleave           = ff_float_interleave_c;
 
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index e0afee47e1..d7741135b7 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -68,8 +68,17 @@ typedef struct FmtConvertContext {
      */
     void (*float_to_int16_interleave)(int16_t *dst, const float **src,
                                       long len, int channels);
+
+    /**
+     * Convert an array of interleaved float to multiple arrays of float.
+     */
+    void (*float_interleave)(float *dst, const float **src, unsigned int len,
+                             int channels);
 } FmtConvertContext;
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels);
+
 void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 5cd8f6c596..e023b48322 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -20,6 +20,7 @@
 ;******************************************************************************
 
 %include "x86inc.asm"
+%include "x86util.asm"
 
 section .text align=16
 
@@ -89,3 +90,143 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow
 %undef pswapd
 FLOAT_TO_INT16_INTERLEAVE6 3dn2
 %undef cvtps2pi
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro BUTTERFLYPS 3
+    movaps    m%3, m%1
+    unpcklps  m%1, m%2
+    unpckhps  m%3, m%2
+    SWAP %2, %3
+%endmacro
+
+%macro FLOAT_INTERLEAVE6 2
+cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
+%ifdef ARCH_X86_64
+    %define lend r10d
+    mov     lend, r2d
+%else
+    %define lend dword r2m
+%endif
+    mov    src1q, [srcq+1*gprsize]
+    mov    src2q, [srcq+2*gprsize]
+    mov    src3q, [srcq+3*gprsize]
+    mov    src4q, [srcq+4*gprsize]
+    mov    src5q, [srcq+5*gprsize]
+    mov     srcq, [srcq]
+    sub    src1q, srcq
+    sub    src2q, srcq
+    sub    src3q, srcq
+    sub    src4q, srcq
+    sub    src5q, srcq
+.loop:
+%ifidn %1, sse
+    movaps    m0, [srcq]
+    movaps    m1, [srcq+src1q]
+    movaps    m2, [srcq+src2q]
+    movaps    m3, [srcq+src3q]
+    movaps    m4, [srcq+src4q]
+    movaps    m5, [srcq+src5q]
+
+    BUTTERFLYPS 0, 1, 6
+    BUTTERFLYPS 2, 3, 6
+    BUTTERFLYPS 4, 5, 6
+
+    movaps    m6, m4
+    shufps    m4, m0, 0xe4
+    movlhps   m0, m2
+    movhlps   m6, m2
+    movaps [dstq   ], m0
+    movaps [dstq+16], m4
+    movaps [dstq+32], m6
+
+    movaps    m6, m5
+    shufps    m5, m1, 0xe4
+    movlhps   m1, m3
+    movhlps   m6, m3
+    movaps [dstq+48], m1
+    movaps [dstq+64], m5
+    movaps [dstq+80], m6
+%else ; mmx
+    movq       m0, [srcq]
+    movq       m1, [srcq+src1q]
+    movq       m2, [srcq+src2q]
+    movq       m3, [srcq+src3q]
+    movq       m4, [srcq+src4q]
+    movq       m5, [srcq+src5q]
+
+    SBUTTERFLY dq, 0, 1, 6
+    SBUTTERFLY dq, 2, 3, 6
+    SBUTTERFLY dq, 4, 5, 6
+    movq [dstq   ], m0
+    movq [dstq+ 8], m2
+    movq [dstq+16], m4
+    movq [dstq+24], m1
+    movq [dstq+32], m3
+    movq [dstq+40], m5
+%endif
+    add      srcq, mmsize
+    add      dstq, mmsize*6
+    sub      lend, mmsize/4
+    jg .loop
+%ifidn %1, mmx
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_MMX
+FLOAT_INTERLEAVE6 mmx, 0
+INIT_XMM
+FLOAT_INTERLEAVE6 sse, 7
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
+;-----------------------------------------------------------------------------
+
+%macro FLOAT_INTERLEAVE2 2
+cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
+    mov     src1q, [srcq+gprsize]
+    mov      srcq, [srcq        ]
+    sub     src1q, srcq
+.loop
+    MOVPS      m0, [srcq             ]
+    MOVPS      m1, [srcq+src1q       ]
+    MOVPS      m3, [srcq      +mmsize]
+    MOVPS      m4, [srcq+src1q+mmsize]
+
+    MOVPS      m2, m0
+    PUNPCKLDQ  m0, m1
+    PUNPCKHDQ  m2, m1
+
+    MOVPS      m1, m3
+    PUNPCKLDQ  m3, m4
+    PUNPCKHDQ  m1, m4
+
+    MOVPS [dstq         ], m0
+    MOVPS [dstq+1*mmsize], m2
+    MOVPS [dstq+2*mmsize], m3
+    MOVPS [dstq+3*mmsize], m1
+
+    add      srcq, mmsize*2
+    add      dstq, mmsize*4
+    sub      lend, mmsize/2
+    jg .loop
+%ifidn %1, mmx
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_MMX
+%define MOVPS     movq
+%define PUNPCKLDQ punpckldq
+%define PUNPCKHDQ punpckhdq
+FLOAT_INTERLEAVE2 mmx, 0
+INIT_XMM
+%define MOVPS     movaps
+%define PUNPCKLDQ unpcklps
+%define PUNPCKHDQ unpckhps
+FLOAT_INTERLEAVE2 sse, 5
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 847bd80fcd..61a4272a69 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -235,11 +235,40 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
         float_to_int16_interleave_3dnow(dst, src, len, channels);
 }
 
+void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
+
+void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
+
+static void float_interleave_mmx(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_mmx(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_mmx(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
+static void float_interleave_sse(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_sse(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_sse(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
+        c->float_interleave = float_interleave_mmx;
 
         if(mm_flags & AV_CPU_FLAG_3DNOW){
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -256,6 +285,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
             c->float_to_int16 = float_to_int16_sse;
             c->float_to_int16_interleave = float_to_int16_interleave_sse;
+            c->float_interleave = float_interleave_sse;
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;

From 64150ff014708b4b00cb5d76237e9e908ac0fbfc Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 19 Apr 2011 19:47:01 -0400
Subject: [PATCH 091/830] Add request_sample_fmt field to AVCodecContext.

This will allow audio decoders to support output of different sample formats
as a runtime option.
---
 libavcodec/avcodec.h | 7 +++++++
 libavcodec/options.c | 6 ++++++
 libavcodec/version.h | 2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 2eb218ba4f..e067ee0273 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2836,6 +2836,13 @@ typedef struct AVCodecContext {
      * - decoding: Set by libavcodec.
      */
     enum AVAudioServiceType audio_service_type;
+
+    /**
+     * Used to request a sample format from the decoder.
+     * - encoding: unused.
+     * - decoding: Set by user.
+     */
+    enum AVSampleFormat request_sample_fmt;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/options.c b/libavcodec/options.c
index 9c714fb73e..8f9aec4ac2 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -441,6 +441,12 @@ static const AVOption options[]={
 {"em", "Emergency",          0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_EMERGENCY },         INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"vo", "Voice Over",         0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_VOICE_OVER },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"ka", "Karaoke",            0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"request_sample_fmt", NULL, OFFSET(request_sample_fmt), FF_OPT_TYPE_INT, {.dbl = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
+{"u8" , "8-bit unsigned integer", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_U8  }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"s16", "16-bit signed integer",  0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S16 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"s32", "32-bit signed integer",  0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S32 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"flt", "32-bit float",           0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_FLT }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"dbl", "64-bit double",          0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_DBL }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
 {NULL},
 };
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 1b454b8bd6..0def0d1ff9 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR  3
+#define LIBAVCODEC_VERSION_MINOR  4
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

From 00e5da893dc8f67cbc9ed940c4da9423a01e4f8c Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 21 Apr 2011 10:48:05 -0400
Subject: [PATCH 092/830] Add APIchanges entry for request_sample_fmt.

---
 doc/APIchanges | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/APIchanges b/doc/APIchanges
index ee96ddf258..faa1698684 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-xx-xx - xxxxxxx - lavc 53.4.0 - AVCodecContext.request_sample_fmt
+  Add request_sample_fmt field to AVCodecContext.
+
 2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h
   Deprecate AVLPCType and the following fields in
   AVCodecContext: lpc_coeff_precision, prediction_order_method,

From bc778a0cea3027941afa1ff6bbb424b3159a0b27 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Sun, 24 Apr 2011 16:48:42 -0400
Subject: [PATCH 093/830] Add support for request_sample_format in ffmpeg and
 ffplay.

---
 ffmpeg.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 ffplay.c | 17 +++++++++++++
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index c99c4de024..71be22e05e 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -546,6 +546,46 @@ static void choose_sample_fmt(AVStream *st, AVCodec *codec)
     }
 }
 
+/**
+ * Update the requested input sample format based on the output sample format.
+ * This is currently only used to request float output from decoders which
+ * support multiple sample formats, one of which is AV_SAMPLE_FMT_FLT.
+ * Ideally this will be removed in the future when decoders do not do format
+ * conversion and only output in their native format.
+ */
+static void update_sample_fmt(AVCodecContext *dec, AVCodec *dec_codec,
+                              AVCodecContext *enc)
+{
+    /* if sample formats match or a decoder sample format has already been
+       requested, just return */
+    if (enc->sample_fmt == dec->sample_fmt ||
+        dec->request_sample_fmt > AV_SAMPLE_FMT_NONE)
+        return;
+
+    /* if decoder supports more than one output format */
+    if (dec_codec && dec_codec->sample_fmts &&
+        dec_codec->sample_fmts[0] != AV_SAMPLE_FMT_NONE &&
+        dec_codec->sample_fmts[1] != AV_SAMPLE_FMT_NONE) {
+        enum AVSampleFormat *p;
+        int min_dec = -1, min_inc = -1;
+
+        /* find a matching sample format in the encoder */
+        for (p = dec_codec->sample_fmts; *p != AV_SAMPLE_FMT_NONE; p++) {
+            if (*p == enc->sample_fmt) {
+                dec->request_sample_fmt = *p;
+                return;
+            } else if (*p > enc->sample_fmt) {
+                min_inc = FFMIN(min_inc, *p - enc->sample_fmt);
+            } else
+                min_dec = FFMIN(min_dec, enc->sample_fmt - *p);
+        }
+
+        /* if none match, provide the one that matches quality closest */
+        dec->request_sample_fmt = min_inc > 0 ? enc->sample_fmt + min_inc :
+                                  enc->sample_fmt - min_dec;
+    }
+}
+
 static void choose_sample_rate(AVStream *st, AVCodec *codec)
 {
     if(codec && codec->supported_samplerates){
@@ -751,7 +791,7 @@ need_realloc:
         ffmpeg_exit(1);
     }
 
-    if (enc->channels != dec->channels)
+    if (enc->channels != dec->channels || enc->sample_rate != dec->sample_rate)
         ost->audio_resample = 1;
 
     resample_changed = ost->resample_sample_fmt  != dec->sample_fmt ||
@@ -777,7 +817,7 @@ need_realloc:
             ost->resample_sample_rate == enc->sample_rate) {
             ost->resample = NULL;
             ost->audio_resample = 0;
-        } else {
+        } else if (ost->audio_resample) {
             if (dec->sample_fmt != AV_SAMPLE_FMT_S16)
                 fprintf(stderr, "Warning, using s16 intermediate sample format for resampling\n");
             ost->resample = av_audio_resample_init(enc->channels,    dec->channels,
@@ -2308,6 +2348,17 @@ static int transcode(AVFormatContext **output_files,
                 ret = AVERROR(EINVAL);
                 goto dump_format;
             }
+
+            /* update requested sample format for the decoder based on the
+               corresponding encoder sample format */
+            for (j = 0; j < nb_ostreams; j++) {
+                ost = ost_table[j];
+                if (ost->source_index == i) {
+                    update_sample_fmt(ist->st->codec, codec, ost->st->codec);
+                    break;
+                }
+            }
+
             if (avcodec_open(ist->st->codec, codec) < 0) {
                 snprintf(error, sizeof(error), "Error while opening decoder for input stream #%d.%d",
                         ist->file_index, ist->index);
@@ -3179,6 +3230,23 @@ static void opt_input_file(const char *filename)
 
     ic->loop_input = loop_input;
 
+    /* Set AVCodecContext options so they will be seen by av_find_stream_info() */
+    for (i = 0; i < ic->nb_streams; i++) {
+        AVCodecContext *dec = ic->streams[i]->codec;
+        switch (dec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO:
+            set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO],
+                             AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM,
+                             NULL);
+            break;
+        case AVMEDIA_TYPE_VIDEO:
+            set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_VIDEO],
+                             AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM,
+                             NULL);
+            break;
+        }
+    }
+
     /* If not enough info to get the stream parameters, we decode the
        first frames to get it. (used in mpeg case for example) */
     ret = av_find_stream_info(ic);
diff --git a/ffplay.c b/ffplay.c
index a5dc358f72..c3dc970a1e 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -2412,6 +2412,23 @@ static int decode_thread(void *arg)
     if(genpts)
         ic->flags |= AVFMT_FLAG_GENPTS;
 
+    /* Set AVCodecContext options so they will be seen by av_find_stream_info() */
+    for (i = 0; i < ic->nb_streams; i++) {
+        AVCodecContext *dec = ic->streams[i]->codec;
+        switch (dec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO:
+            set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO],
+                             AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM,
+                             NULL);
+            break;
+        case AVMEDIA_TYPE_VIDEO:
+            set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_VIDEO],
+                             AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM,
+                             NULL);
+            break;
+        }
+    }
+
     err = av_find_stream_info(ic);
     if (err < 0) {
         fprintf(stderr, "%s: could not find codec parameters\n", is->filename);

From 9aa8193a234ccb6a79cba5cc550531f62ffb0a17 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 22 Apr 2011 21:30:19 -0400
Subject: [PATCH 094/830] Add floating-point sample format support to the ac3,
 eac3, dca, aac, and vorbis decoders.

Based on patches by clsid2 in ffdshow-tryout.
---
 libavcodec/aacdec.c    | 34 ++++++++++++++++++++++++----------
 libavcodec/aacsbr.c    | 11 ++++++++---
 libavcodec/aacsbr.h    |  2 +-
 libavcodec/ac3dec.c    | 32 ++++++++++++++++++++++++++------
 libavcodec/dca.c       | 34 +++++++++++++++++++++++++++-------
 libavcodec/vorbisdec.c | 23 ++++++++++++++++++-----
 6 files changed, 104 insertions(+), 32 deletions(-)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 5f9dd834a0..f2d50f4aba 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -186,7 +186,7 @@ static av_cold int che_configure(AACContext *ac,
     if (che_pos[type][id]) {
         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
             return AVERROR(ENOMEM);
-        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
+        ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
         if (type != TYPE_CCE) {
             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
             if (type == TYPE_CPE ||
@@ -546,6 +546,7 @@ static void reset_predictor_group(PredictorState *ps, int group_num)
 static av_cold int aac_decode_init(AVCodecContext *avctx)
 {
     AACContext *ac = avctx->priv_data;
+    float output_scale_factor;
 
     ac->avctx = avctx;
     ac->m4ac.sample_rate = avctx->sample_rate;
@@ -557,7 +558,13 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
             return -1;
     }
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        output_scale_factor = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        output_scale_factor = 1.0;
+    }
 
     AAC_INIT_VLC_STATIC( 0, 304);
     AAC_INIT_VLC_STATIC( 1, 270);
@@ -585,9 +592,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                     352);
 
-    ff_mdct_init(&ac->mdct,       11, 1, 1.0/1024.0);
-    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0/128.0);
-    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0);
+    ff_mdct_init(&ac->mdct,       11, 1, output_scale_factor/1024.0);
+    ff_mdct_init(&ac->mdct_small,  8, 1, output_scale_factor/128.0);
+    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0/output_scale_factor);
     // window initialization
     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
@@ -2169,7 +2176,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         avctx->frame_size = samples;
     }
 
-    data_size_tmp = samples * avctx->channels * sizeof(int16_t);
+    data_size_tmp = samples * avctx->channels *
+                    (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     if (*data_size < data_size_tmp) {
         av_log(avctx, AV_LOG_ERROR,
                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -2178,8 +2186,14 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
     }
     *data_size = data_size_tmp;
 
-    if (samples)
-        ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+    if (samples) {
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
+            ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
+                                          samples, avctx->channels);
+        else
+            ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data,
+                                                   samples, avctx->channels);
+    }
 
     if (ac->output_configured)
         ac->output_configured = OC_LOCKED;
@@ -2497,7 +2511,7 @@ AVCodec ff_aac_decoder = {
     aac_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
@@ -2517,7 +2531,7 @@ AVCodec ff_aac_latm_decoder = {
     .decode = latm_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index 7a217abfda..81b0b4c001 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -126,14 +126,19 @@ av_cold void ff_aac_sbr_init(void)
     ff_ps_init();
 }
 
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr)
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
 {
+    float mdct_scale;
     sbr->kx[0] = sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
     sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
     sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
     sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    ff_mdct_init(&sbr->mdct, 7, 1, 1.0/64);
-    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0);
+    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
+     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
+     * and scale back down at synthesis. */
+    mdct_scale = ac->avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32768.0f : 1.0f;
+    ff_mdct_init(&sbr->mdct,     7, 1, 1.0 / (64 * mdct_scale));
+    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale);
     ff_ps_ctx_init(&sbr->ps);
 }
 
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index dca83305ad..153070d3f2 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -36,7 +36,7 @@
 /** Initialize SBR. */
 av_cold void ff_aac_sbr_init(void);
 /** Initialize one SBR context. */
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr);
 /** Close one SBR context. */
 av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
 /** Decode one SBR element. */
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 015ebaebec..2966c33b25 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -189,7 +189,13 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     av_lfg_init(&s->dith_state, 0);
 
     /* set scale value for float to int16 conversion */
-    s->mul_bias = 32767.0f;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        s->mul_bias = 1.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    } else {
+        s->mul_bias = 32767.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    }
 
     /* allow downmixing to stereo or mono */
     if (avctx->channels > 0 && avctx->request_channels > 0 &&
@@ -204,7 +210,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
         if (!s->input_buffer)
             return AVERROR(ENOMEM);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
     return 0;
 }
 
@@ -1299,7 +1304,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
-    int16_t *out_samples = (int16_t *)data;
+    float   *out_samples_flt = data;
+    int16_t *out_samples_s16 = data;
     int blk, ch, err;
     const uint8_t *channel_map;
     const float *output[AC3_MAX_CHANNELS];
@@ -1405,10 +1411,18 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
             av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
             err = 1;
         }
-        s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
-        out_samples += 256 * s->out_channels;
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.float_interleave(out_samples_flt, output, 256,
+                                         s->out_channels);
+            out_samples_flt += 256 * s->out_channels;
+        } else {
+            s->fmt_conv.float_to_int16_interleave(out_samples_s16, output, 256,
+                                                  s->out_channels);
+            out_samples_s16 += 256 * s->out_channels;
+        }
     }
-    *data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t);
+    *data_size = s->num_blocks * 256 * avctx->channels *
+                 (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     return FFMIN(buf_size, s->frame_size);
 }
 
@@ -1435,6 +1449,9 @@ AVCodec ff_ac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 
 #if CONFIG_EAC3_DECODER
@@ -1447,5 +1464,8 @@ AVCodec ff_eac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 #endif
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index f1cd64e9ed..dbadebacff 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1626,7 +1626,9 @@ static int dca_decode_frame(AVCodecContext * avctx,
     int lfe_samples;
     int num_core_channels = 0;
     int i;
-    int16_t *samples = data;
+    float   *samples_flt = data;
+    int16_t *samples_s16 = data;
+    int out_size;
     DCAContext *s = avctx->priv_data;
     int channels;
     int core_ss_end;
@@ -1812,9 +1814,11 @@ static int dca_decode_frame(AVCodecContext * avctx,
         return -1;
     }
 
-    if (*data_size < (s->sample_blocks / 8) * 256 * sizeof(int16_t) * channels)
+    out_size = 256 / 8 * s->sample_blocks * channels *
+               (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
+    if (*data_size < out_size)
         return -1;
-    *data_size = 256 / 8 * s->sample_blocks * sizeof(int16_t) * channels;
+    *data_size = out_size;
 
     /* filter to get final output */
     for (i = 0; i < (s->sample_blocks / 8); i++) {
@@ -1833,8 +1837,16 @@ static int dca_decode_frame(AVCodecContext * avctx,
             }
         }
 
-        s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
-        samples += 256 * channels;
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.float_interleave(samples_flt, s->samples_chanptr, 256,
+                                         channels);
+            samples_flt += 256 * channels;
+        } else {
+            s->fmt_conv.float_to_int16_interleave(samples_s16,
+                                                  s->samples_chanptr, 256,
+                                                  channels);
+            samples_s16 += 256 * channels;
+        }
     }
 
     /* update lfe history */
@@ -1870,9 +1882,14 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
 
     for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++)
         s->samples_chanptr[i] = s->samples + i * 256;
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
-    s->scale_bias = 1.0;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        s->scale_bias = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->scale_bias = 1.0;
+    }
 
     /* allow downmixing to stereo */
     if (avctx->channels > 0 && avctx->request_channels < avctx->channels &&
@@ -1909,5 +1926,8 @@ AVCodec ff_dca_decoder = {
     .close = dca_decode_end,
     .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
     .capabilities = CODEC_CAP_CHANNEL_CONF,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
 };
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 7443e98a64..f6ec74f4a1 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -979,7 +979,13 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     dsputil_init(&vc->dsp, avccontext);
     ff_fmt_convert_init(&vc->fmt_conv, avccontext);
 
-    vc->scale_bias = 32768.0f;
+    if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_FLT;
+        vc->scale_bias = 1.0f;
+    } else {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_S16;
+        vc->scale_bias = 32768.0f;
+    }
 
     if (!headers_len) {
         av_log(avccontext, AV_LOG_ERROR, "Extradata missing.\n");
@@ -1024,7 +1030,6 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     avccontext->channels    = vc->audio_channels;
     avccontext->sample_rate = vc->audio_samplerate;
     avccontext->frame_size  = FFMIN(vc->blocksize[0], vc->blocksize[1]) >> 2;
-    avccontext->sample_fmt  = AV_SAMPLE_FMT_S16;
 
     return 0 ;
 }
@@ -1634,9 +1639,14 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
                               len * ff_vorbis_channel_layout_offsets[vc->audio_channels - 1][i];
     }
 
-    vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
-                                           vc->audio_channels);
-    *data_size = len * 2 * vc->audio_channels;
+    if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT)
+        vc->fmt_conv.float_interleave(data, channel_ptrs, len, vc->audio_channels);
+    else
+        vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
+                                               vc->audio_channels);
+
+    *data_size = len * vc->audio_channels *
+                 (av_get_bits_per_sample_fmt(avccontext->sample_fmt) / 8);
 
     return buf_size ;
 }
@@ -1663,5 +1673,8 @@ AVCodec ff_vorbis_decoder = {
     vorbis_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
     .channel_layouts = ff_vorbis_channel_layouts,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 

From 41e21e4db623ebd77f431a6f30cf21d62d9e1f33 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 18 May 2011 17:52:31 -0400
Subject: [PATCH 095/830] APIchanges: fill in date and commit for
 request_sample_fmt

---
 doc/APIchanges | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index faa1698684..853975e76d 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,7 +13,7 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-xx-xx - xxxxxxx - lavc 53.4.0 - AVCodecContext.request_sample_fmt
+2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt
   Add request_sample_fmt field to AVCodecContext.
 
 2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h

From 155d1d189bf240764a18a6923739e84d09ac5d08 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Wed, 18 May 2011 18:54:42 +0200
Subject: [PATCH 096/830] Add little description for -rc_override

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 doc/ffmpeg.texi | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 70ad6c5551..7f2230d46f 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -409,7 +409,10 @@ and the following constants are available:
 @end table
 
 @item -rc_override @var{override}
-rate control override for specific intervals
+Rate control override for specific intervals, formated as "int,int,int"
+list separated with slashes. Two first values are the beginning and
+end frame numbers, last one is quantizer to use if positive, or quality
+factor if negative.
 @item -me_method @var{method}
 Set motion estimation method to @var{method}.
 Available methods are (from lowest to best quality):

From e98a95e7792f9452bc790f2552d9ea24d0f7818e Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 18 May 2011 20:18:08 -0400
Subject: [PATCH 097/830] 10l: wrap float_interleave functions in HAVE_YASM.

fixes compilation with --disable-yasm
---
 libavcodec/x86/fmtconvert_mmx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 61a4272a69..253f60bfc2 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -235,6 +235,7 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
         float_to_int16_interleave_3dnow(dst, src, len, channels);
 }
 
+#if HAVE_YASM
 void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
 void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
 
@@ -262,13 +263,16 @@ static void float_interleave_sse(float *dst, const float **src,
     else
         ff_float_interleave_c(dst, src, len, channels);
 }
+#endif
 
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
+#if HAVE_YASM
         c->float_interleave = float_interleave_mmx;
+#endif
 
         if(mm_flags & AV_CPU_FLAG_3DNOW){
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -285,7 +289,9 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
             c->float_to_int16 = float_to_int16_sse;
             c->float_to_int16_interleave = float_to_int16_interleave_sse;
+#if HAVE_YASM
             c->float_interleave = float_interleave_sse;
+#endif
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;

From 8529f9b36b7c1b8f2cb36ba2709983517c4b6458 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 18 May 2011 21:39:22 +0200
Subject: [PATCH 098/830] av_find_stream_info: Print more details about max
 anaylize duration failures.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/utils.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 70429a7650..3509620c20 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2327,8 +2327,9 @@ int av_find_stream_info(AVFormatContext *ic)
 
         st = ic->streams[pkt->stream_index];
         if (st->codec_info_nb_frames>1) {
-            if (st->time_base.den > 0 && av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q) >= ic->max_analyze_duration) {
-                av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n");
+            int64_t t;
+            if (st->time_base.den > 0 && (t=av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q)) >= ic->max_analyze_duration) {
+                av_log(ic, AV_LOG_WARNING, "max_analyze_duration %d reached at %Ld\n", ic->max_analyze_duration, t);
                 break;
             }
             st->info->codec_info_duration += pkt->duration;

From a2077b85da5bf71167b11925aabbb738558a134d Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 17 May 2011 15:29:58 +0200
Subject: [PATCH 099/830] g729: Merge g729.h into g729dec.c.

The header contains just a single define that is only used in g729dec.c.
---
 libavcodec/g729.h    | 29 -----------------------------
 libavcodec/g729dec.c |  7 ++++++-
 2 files changed, 6 insertions(+), 30 deletions(-)
 delete mode 100644 libavcodec/g729.h

diff --git a/libavcodec/g729.h b/libavcodec/g729.h
deleted file mode 100644
index 9cae471bc2..0000000000
--- a/libavcodec/g729.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * G.729 decoder
- * Copyright (c) 2008 Vladimir Voroshilov
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef AVCODEC_G729_H
-#define AVCODEC_G729_H
-
-/**
- * subframe size
- */
-#define SUBFRAME_SIZE 40
-
-#endif // AVCODEC_G729_H
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
index 5763108d71..979b87fd27 100644
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@@ -30,7 +30,6 @@
 #include "libavutil/avutil.h"
 #include "get_bits.h"
 
-#include "g729.h"
 #include "lsp.h"
 #include "celp_math.h"
 #include "acelp_filters.h"
@@ -71,6 +70,12 @@
  */
 #define SHARP_MAX                  13017
 
+/**
+ * subframe size
+ */
+#define SUBFRAME_SIZE              40
+
+
 typedef struct {
     uint8_t ac_index_bits[2];   ///< adaptive codebook index for second subframe (size in bits)
     uint8_t parity_bit;         ///< parity bit for pitch delay

From ea91e77127229015d23a046f1797d3fc6a33e54d Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 17:09:25 +0200
Subject: [PATCH 100/830] Clean up #includes in cmdutils.h.

inttypes.h is not necessary, just stdint.h is enough.
Unconditionally #include avfilter.h in cmdutils.h.  It is an installed
header with no non-standard external dependencies, so it is safe.
---
 cmdutils.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cmdutils.h b/cmdutils.h
index bb8d779a75..c99c8653fa 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -22,8 +22,10 @@
 #ifndef FFMPEG_CMDUTILS_H
 #define FFMPEG_CMDUTILS_H
 
-#include <inttypes.h>
+#include <stdint.h>
+
 #include "libavcodec/avcodec.h"
+#include "libavfilter/avfilter.h"
 #include "libavformat/avformat.h"
 #include "libswscale/swscale.h"
 
@@ -278,9 +280,6 @@ int64_t guess_correct_pts(PtsCorrectionContext *ctx, int64_t pts, int64_t dts);
 FILE *get_preset_file(char *filename, size_t filename_size,
                       const char *preset_name, int is_path, const char *codec_name);
 
-#if CONFIG_AVFILTER
-#include "libavfilter/avfilter.h"
-
 typedef struct {
     enum PixelFormat pix_fmt;
 } FFSinkContext;
@@ -296,6 +295,4 @@ extern AVFilter ffsink;
 int get_filtered_video_frame(AVFilterContext *sink, AVFrame *frame,
                              AVFilterBufferRef **picref, AVRational *pts_tb);
 
-#endif /* CONFIG_AVFILTER */
-
 #endif /* FFMPEG_CMDUTILS_H */

From c4f5c2d6f4ffa3f4b56555059000208a6ba47b55 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 16:52:01 +0100
Subject: [PATCH 101/830] Move some mpegaudio functions to new mpegaudiodsp
 subsystem

This separation allows these functions to be used in a cleaner
fashion from other codecs (e.g. qdm2) and simplifies creating
optimised versions of them.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure                             |  19 ++-
 libavcodec/Makefile                   |   3 +
 libavcodec/mpc.c                      |   4 +-
 libavcodec/mpc.h                      |   1 +
 libavcodec/mpc7.c                     |   3 +-
 libavcodec/mpc8.c                     |   3 +-
 libavcodec/mpegaudio.h                |  24 +--
 libavcodec/mpegaudiodec.c             | 197 +------------------------
 libavcodec/mpegaudiodec_float.c       |  19 ---
 libavcodec/mpegaudiodsp.c             |  40 +++++
 libavcodec/mpegaudiodsp.h             |  63 ++++++++
 libavcodec/mpegaudiodsp_fixed.c       |  20 +++
 libavcodec/mpegaudiodsp_float.c       |  20 +++
 libavcodec/mpegaudiodsp_template.c    | 205 ++++++++++++++++++++++++++
 libavcodec/ppc/mpegaudiodec_altivec.c |   9 +-
 libavcodec/qdm2.c                     |   6 +-
 libavcodec/x86/mpegaudiodec_mmx.c     |   9 +-
 17 files changed, 390 insertions(+), 255 deletions(-)
 create mode 100644 libavcodec/mpegaudiodsp.c
 create mode 100644 libavcodec/mpegaudiodsp.h
 create mode 100644 libavcodec/mpegaudiodsp_fixed.c
 create mode 100644 libavcodec/mpegaudiodsp_float.c
 create mode 100644 libavcodec/mpegaudiodsp_template.c

diff --git a/configure b/configure
index 5b81e0b599..6becb09724 100755
--- a/configure
+++ b/configure
@@ -952,6 +952,7 @@ CONFIG_LIST="
     mdct
     memalign_hack
     mlib
+    mpegaudiodsp
     network
     nonfree
     pic
@@ -1235,6 +1236,7 @@ symver_if_any="symver_asm_label symver_gnu_asm"
 dct_select="rdft"
 mdct_select="fft"
 rdft_select="fft"
+mpegaudiodsp_select="dct"
 
 # decoders / encoders / hardware accelerators
 aac_decoder_select="mdct sinewin"
@@ -1286,11 +1288,16 @@ ljpeg_encoder_select="aandct"
 loco_decoder_select="golomb"
 mjpeg_encoder_select="aandct"
 mlp_decoder_select="mlp_parser"
-mp1float_decoder_select="dct"
-mp2float_decoder_select="dct"
-mp3adufloat_decoder_select="dct"
-mp3float_decoder_select="dct"
-mp3on4float_decoder_select="dct"
+mp1_decoder_select="mpegaudiodsp"
+mp2_decoder_select="mpegaudiodsp"
+mp3adu_decoder_select="mpegaudiodsp"
+mp3_decoder_select="mpegaudiodsp"
+mp3on4_decoder_select="mpegaudiodsp"
+mp1float_decoder_select="mpegaudiodsp"
+mp2float_decoder_select="mpegaudiodsp"
+mp3adufloat_decoder_select="mpegaudiodsp"
+mp3float_decoder_select="mpegaudiodsp"
+mp3on4float_decoder_select="mpegaudiodsp"
 mpeg1video_encoder_select="aandct"
 mpeg2video_encoder_select="aandct"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
@@ -1315,7 +1322,7 @@ nellymoser_encoder_select="mdct sinewin"
 png_decoder_select="zlib"
 png_encoder_select="zlib"
 qcelp_decoder_select="lsp"
-qdm2_decoder_select="mdct rdft"
+qdm2_decoder_select="mdct rdft mpegaudiodsp"
 ra_144_encoder_select="lpc"
 rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa70216c9c..b26c33de63 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -40,6 +40,9 @@ OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
 OBJS-$(CONFIG_LPC)                     += lpc.o
 OBJS-$(CONFIG_LSP)                     += lsp.o
 OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o
+OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
+                                          mpegaudiodsp_fixed.o          \
+                                          mpegaudiodsp_float.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index 15febefe0b..4573860525 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -29,6 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"
 
 #include "mpc.h"
@@ -51,7 +52,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
     for(ch = 0;  ch < channels; ch++){
         samples_ptr = samples + ch;
         for(i = 0; i < SAMPLES_PER_BAND; i++) {
-            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&c->mpadsp,
+                                c->synth_buf[ch], &(c->synth_buf_offset[ch]),
                                 ff_mpa_synth_window_fixed, &dither_state,
                                 samples_ptr, channels,
                                 c->sb_samples[ch][i]);
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index 67fc7feed0..eea4b6df36 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -52,6 +52,7 @@ typedef struct {
 
 typedef struct {
     DSPContext dsp;
+    MPADSPContext mpadsp;
     GetBitContext gb;
     int IS, MSS, gapless;
     int lastframelen;
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 6a4bf57043..dbfa3c8636 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"
 
 #include "mpc.h"
@@ -68,6 +68,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
     memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
     av_lfg_init(&c->rnd, 0xDEADBEEF);
     dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);
     c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4);
     ff_mpc_init();
     init_get_bits(&gb, buf, 128);
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 5de8c15b4c..81de9cf500 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"
 
 #include "mpc.h"
@@ -120,6 +120,7 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
     memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
     av_lfg_init(&c->rnd, 0xDEADBEEF);
     dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);
 
     ff_mpc_init();
 
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 3422b6df68..c33960e987 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -33,7 +33,6 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "dct.h"
 
 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@@ -69,7 +68,6 @@
 typedef float OUT_INT;
 #else
 typedef int16_t OUT_INT;
-#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 #endif
 
 #if CONFIG_FLOAT
@@ -142,11 +140,7 @@ typedef struct MPADecodeContext {
     int dither_state;
     int error_recognition;
     AVCodecContext* avctx;
-#if CONFIG_FLOAT
-    DCTContext dct;
-#endif
-    void (*apply_window_mp3)(MPA_INT *synth_buf, MPA_INT *window,
-                             int *dither_state, OUT_INT *samples, int incr);
+    MPADSPContext mpadsp;
 } MPADecodeContext;
 
 /* layer 3 huffman tables */
@@ -158,22 +152,6 @@ typedef struct HuffTable {
 
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window_fixed[];
-void ff_mpa_synth_init_fixed(MPA_INT *window);
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpa_synth_init_float(MPA_INT *window);
-void ff_mpa_synth_filter_float(MPADecodeContext *s,
-                         MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s);
-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s);
 
 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 4802a04bc8..cc193c68d0 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -29,7 +29,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mathops.h"
-#include "dct32.h"
+#include "mpegaudiodsp.h"
 
 /*
  * TODO:
@@ -68,8 +68,6 @@
 #include "mpegaudiodectab.h"
 
 static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr);
 
 /* vlc structure for decoding layer 3 huffman tables */
 static VLC huff_vlc[16];
@@ -119,8 +117,6 @@ static const int32_t scale_factor_mult2[3][3] = {
     SCALE_GEN(4.0 / 9.0), /* 9 steps */
 };
 
-DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
-
 /**
  * Convert region offsets to region sizes and truncate
  * size to big_values.
@@ -259,14 +255,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
     int i, j, k;
 
     s->avctx = avctx;
-    s->apply_window_mp3 = apply_window_mp3_c;
-#if HAVE_MMX && CONFIG_FLOAT
-    ff_mpegaudiodec_init_mmx(s);
-#endif
-#if CONFIG_FLOAT
-    ff_dct_init(&s->dct, 5, DCT_II);
-#endif
-    if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s);
+
+    ff_mpadsp_init(&s->mpadsp);
 
     avctx->sample_fmt= OUT_FMT;
     s->error_recognition= avctx->error_recognition;
@@ -461,183 +451,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
     return 0;
 }
 
-
-#if CONFIG_FLOAT
-static inline float round_sample(float *sum)
-{
-    float sum1=*sum;
-    *sum = 0;
-    return sum1;
-}
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MACS(rt, ra, rb) rt+=(ra)*(rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MULS(ra, rb) ((ra)*(rb))
-
-#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
-
-#else
-
-static inline int round_sample(int64_t *sum)
-{
-    int sum1;
-    sum1 = (int)((*sum) >> OUT_SHIFT);
-    *sum &= (1<<OUT_SHIFT)-1;
-    return av_clip_int16(sum1);
-}
-
-#   define MULS(ra, rb) MUL64(ra, rb)
-#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
-#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
-#endif
-
-#define SUM8(op, sum, w, p)               \
-{                                         \
-    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
-    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
-    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
-    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
-    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
-    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
-    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
-    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
-}
-
-#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
-{                                               \
-    INTFLOAT tmp;\
-    tmp = p[0 * 64];\
-    op1(sum1, (w1)[0 * 64], tmp);\
-    op2(sum2, (w2)[0 * 64], tmp);\
-    tmp = p[1 * 64];\
-    op1(sum1, (w1)[1 * 64], tmp);\
-    op2(sum2, (w2)[1 * 64], tmp);\
-    tmp = p[2 * 64];\
-    op1(sum1, (w1)[2 * 64], tmp);\
-    op2(sum2, (w2)[2 * 64], tmp);\
-    tmp = p[3 * 64];\
-    op1(sum1, (w1)[3 * 64], tmp);\
-    op2(sum2, (w2)[3 * 64], tmp);\
-    tmp = p[4 * 64];\
-    op1(sum1, (w1)[4 * 64], tmp);\
-    op2(sum2, (w2)[4 * 64], tmp);\
-    tmp = p[5 * 64];\
-    op1(sum1, (w1)[5 * 64], tmp);\
-    op2(sum2, (w2)[5 * 64], tmp);\
-    tmp = p[6 * 64];\
-    op1(sum1, (w1)[6 * 64], tmp);\
-    op2(sum2, (w2)[6 * 64], tmp);\
-    tmp = p[7 * 64];\
-    op1(sum1, (w1)[7 * 64], tmp);\
-    op2(sum2, (w2)[7 * 64], tmp);\
-}
-
-void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
-{
-    int i, j;
-
-    /* max = 18760, max sum over all 16 coefs : 44736 */
-    for(i=0;i<257;i++) {
-        INTFLOAT v;
-        v = ff_mpa_enwindow[i];
-#if CONFIG_FLOAT
-        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
-#endif
-        window[i] = v;
-        if ((i & 63) != 0)
-            v = -v;
-        if (i != 0)
-            window[512 - i] = v;
-    }
-
-    // Needed for avoiding shuffles in ASM implementations
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+16*i+j] = window[64*i+32-j];
-
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+128+16*i+j] = window[64*i+48-j];
-}
-
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr)
-{
-    register const MPA_INT *w, *w2, *p;
-    int j;
-    OUT_INT *samples2;
-#if CONFIG_FLOAT
-    float sum, sum2;
-#else
-    int64_t sum, sum2;
-#endif
-
-    /* copy to avoid wrap */
-    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
-
-    samples2 = samples + 31 * incr;
-    w = window;
-    w2 = window + 31;
-
-    sum = *dither_state;
-    p = synth_buf + 16;
-    SUM8(MACS, sum, w, p);
-    p = synth_buf + 48;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    samples += incr;
-    w++;
-
-    /* we calculate two samples at the same time to avoid one memory
-       access per two sample */
-    for(j=1;j<16;j++) {
-        sum2 = 0;
-        p = synth_buf + 16 + j;
-        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
-        p = synth_buf + 48 - j;
-        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
-
-        *samples = round_sample(&sum);
-        samples += incr;
-        sum += sum2;
-        *samples2 = round_sample(&sum);
-        samples2 -= incr;
-        w++;
-        w2--;
-    }
-
-    p = synth_buf + 32;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    *dither_state= sum;
-}
-
-
-/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
-   32 samples. */
-/* XXX: optimize by avoiding ring buffer usage */
-#if !CONFIG_FLOAT
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT])
-{
-    register MPA_INT *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    ff_dct32_fixed(synth_buf, sb_samples);
-    apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-#endif
-
 #define C3 FIXHR(0.86602540378443864676/2)
 
 /* 0.5 / cos(pi*(2*i+1)/36) */
@@ -1915,9 +1728,7 @@ static int mp_decode_frame(MPADecodeContext *s,
         samples_ptr = samples + ch;
         for(i=0;i<nb_frames;i++) {
             RENAME(ff_mpa_synth_filter)(
-#if CONFIG_FLOAT
-                         s,
-#endif
+                         &s->mpadsp,
                          s->synth_buf[ch], &(s->synth_buf_offset[ch]),
                          RENAME(ff_mpa_synth_window), &s->dither_state,
                          samples_ptr, s->nb_channels,
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index 0ef85d19c1..94463a824e 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -22,25 +22,6 @@
 #define CONFIG_FLOAT 1
 #include "mpegaudiodec.c"
 
-void ff_mpa_synth_filter_float(MPADecodeContext *s, float *synth_buf_ptr,
-                               int *synth_buf_offset,
-                               float *window, int *dither_state,
-                               float *samples, int incr,
-                               float sb_samples[SBLIMIT])
-{
-    float *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    s->dct.dct32(synth_buf, sb_samples);
-    s->apply_window_mp3(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-
 static void compute_antialias_float(MPADecodeContext *s,
                               GranuleDef *g)
 {
diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c
new file mode 100644
index 0000000000..57fe962b91
--- /dev/null
+++ b/libavcodec/mpegaudiodsp.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "mpegaudiodsp.h"
+#include "dct.h"
+#include "dct32.h"
+
+void ff_mpadsp_init(MPADSPContext *s)
+{
+    DCTContext dct;
+
+    ff_dct_init(&dct, 5, DCT_II);
+
+    s->apply_window_float = ff_mpadsp_apply_window_float;
+    s->apply_window_fixed = ff_mpadsp_apply_window_fixed;
+
+    s->dct32_float = dct.dct32;
+    s->dct32_fixed = ff_dct32_fixed;
+
+    if (HAVE_MMX)     ff_mpadsp_init_mmx(s);
+    if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s);
+}
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
new file mode 100644
index 0000000000..7b05b68eee
--- /dev/null
+++ b/libavcodec/mpegaudiodsp.h
@@ -0,0 +1,63 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIODSP_H
+#define AVCODEC_MPEGAUDIODSP_H
+
+#include <stdint.h>
+
+typedef struct MPADSPContext {
+    void (*apply_window_float)(float *synth_buf, float *window,
+                               int *dither_state, float *samples, int incr);
+    void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window,
+                               int *dither_state, int16_t *samples, int incr);
+    void (*dct32_float)(float *dst, const float *src);
+    void (*dct32_fixed)(int *dst, const int *src);
+} MPADSPContext;
+
+void ff_mpadsp_init(MPADSPContext *s);
+
+extern int32_t ff_mpa_synth_window_fixed[];
+extern float   ff_mpa_synth_window_float[];
+
+void ff_mpa_synth_filter_fixed(MPADSPContext *s,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t *window, int *dither_state,
+                               int16_t *samples, int incr,
+                               int *sb_samples);
+
+void ff_mpa_synth_filter_float(MPADSPContext *s,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float *window, int *dither_state,
+                               float *samples, int incr,
+                               float *sb_samples);
+
+void ff_mpadsp_init_mmx(MPADSPContext *s);
+void ff_mpadsp_init_altivec(MPADSPContext *s);
+
+void ff_mpa_synth_init_float(float *window);
+void ff_mpa_synth_init_fixed(int32_t *window);
+
+void ff_mpadsp_apply_window_float(float *synth_buf, float *window,
+                                  int *dither_state, float *samples,
+                                  int incr);
+void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
+                                  int *dither_state, int16_t *samples,
+                                  int incr);
+
+#endif
diff --git a/libavcodec/mpegaudiodsp_fixed.c b/libavcodec/mpegaudiodsp_fixed.c
new file mode 100644
index 0000000000..3c49a568b7
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 0
+#include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_float.c b/libavcodec/mpegaudiodsp_float.c
new file mode 100644
index 0000000000..2d8d53ea26
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 1
+#include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c
new file mode 100644
index 0000000000..5561c46135
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_template.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/mem.h"
+#include "dct32.h"
+#include "mathops.h"
+#include "mpegaudiodsp.h"
+#include "mpegaudio.h"
+#include "mpegaudiodata.h"
+
+#if CONFIG_FLOAT
+#define RENAME(n) n##_float
+
+static inline float round_sample(float *sum)
+{
+    float sum1=*sum;
+    *sum = 0;
+    return sum1;
+}
+
+#define MACS(rt, ra, rb) rt+=(ra)*(rb)
+#define MULS(ra, rb) ((ra)*(rb))
+#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
+
+#else
+
+#define RENAME(n) n##_fixed
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+
+static inline int round_sample(int64_t *sum)
+{
+    int sum1;
+    sum1 = (int)((*sum) >> OUT_SHIFT);
+    *sum &= (1<<OUT_SHIFT)-1;
+    return av_clip_int16(sum1);
+}
+
+#   define MULS(ra, rb) MUL64(ra, rb)
+#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
+#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
+#endif
+
+DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
+
+#define SUM8(op, sum, w, p)               \
+{                                         \
+    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
+    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
+    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
+    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
+    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
+    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
+    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
+    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
+}
+
+#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
+{                                               \
+    INTFLOAT tmp;\
+    tmp = p[0 * 64];\
+    op1(sum1, (w1)[0 * 64], tmp);\
+    op2(sum2, (w2)[0 * 64], tmp);\
+    tmp = p[1 * 64];\
+    op1(sum1, (w1)[1 * 64], tmp);\
+    op2(sum2, (w2)[1 * 64], tmp);\
+    tmp = p[2 * 64];\
+    op1(sum1, (w1)[2 * 64], tmp);\
+    op2(sum2, (w2)[2 * 64], tmp);\
+    tmp = p[3 * 64];\
+    op1(sum1, (w1)[3 * 64], tmp);\
+    op2(sum2, (w2)[3 * 64], tmp);\
+    tmp = p[4 * 64];\
+    op1(sum1, (w1)[4 * 64], tmp);\
+    op2(sum2, (w2)[4 * 64], tmp);\
+    tmp = p[5 * 64];\
+    op1(sum1, (w1)[5 * 64], tmp);\
+    op2(sum2, (w2)[5 * 64], tmp);\
+    tmp = p[6 * 64];\
+    op1(sum1, (w1)[6 * 64], tmp);\
+    op2(sum2, (w2)[6 * 64], tmp);\
+    tmp = p[7 * 64];\
+    op1(sum1, (w1)[7 * 64], tmp);\
+    op2(sum2, (w2)[7 * 64], tmp);\
+}
+
+void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
+                                  int *dither_state, OUT_INT *samples,
+                                  int incr)
+{
+    register const MPA_INT *w, *w2, *p;
+    int j;
+    OUT_INT *samples2;
+#if CONFIG_FLOAT
+    float sum, sum2;
+#else
+    int64_t sum, sum2;
+#endif
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+
+    sum = *dither_state;
+    p = synth_buf + 16;
+    SUM8(MACS, sum, w, p);
+    p = synth_buf + 48;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    samples += incr;
+    w++;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+    for(j=1;j<16;j++) {
+        sum2 = 0;
+        p = synth_buf + 16 + j;
+        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
+        p = synth_buf + 48 - j;
+        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
+
+        *samples = round_sample(&sum);
+        samples += incr;
+        sum += sum2;
+        *samples2 = round_sample(&sum);
+        samples2 -= incr;
+        w++;
+        w2--;
+    }
+
+    p = synth_buf + 32;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    *dither_state= sum;
+}
+
+/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
+   32 samples. */
+void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr,
+                                 int *synth_buf_offset,
+                                 MPA_INT *window, int *dither_state,
+                                 OUT_INT *samples, int incr,
+                                 MPA_INT *sb_samples)
+{
+    MPA_INT *synth_buf;
+    int offset;
+
+    offset = *synth_buf_offset;
+    synth_buf = synth_buf_ptr + offset;
+
+    s->RENAME(dct32)(synth_buf, sb_samples);
+    s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr);
+
+    offset = (offset - 32) & 511;
+    *synth_buf_offset = offset;
+}
+
+void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
+{
+    int i, j;
+
+    /* max = 18760, max sum over all 16 coefs : 44736 */
+    for(i=0;i<257;i++) {
+        INTFLOAT v;
+        v = ff_mpa_enwindow[i];
+#if CONFIG_FLOAT
+        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
+#endif
+        window[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            window[512 - i] = v;
+    }
+
+    // Needed for avoiding shuffles in ASM implementations
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+16*i+j] = window[64*i+32-j];
+
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+128+16*i+j] = window[64*i+48-j];
+}
diff --git a/libavcodec/ppc/mpegaudiodec_altivec.c b/libavcodec/ppc/mpegaudiodec_altivec.c
index af94276e8a..5df0fdafe4 100644
--- a/libavcodec/ppc/mpegaudiodec_altivec.c
+++ b/libavcodec/ppc/mpegaudiodec_altivec.c
@@ -21,9 +21,8 @@
 
 #include "dsputil_altivec.h"
 #include "util_altivec.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"
 
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -124,7 +123,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
     *out = sum;
 }
 
-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s)
+void ff_mpadsp_init_altivec(MPADSPContext *s)
 {
-    s->apply_window_mp3 = apply_window_mp3;
+    s->apply_window_float = apply_window_mp3;
 }
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 0f4dd18966..f74cfd9258 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -39,6 +39,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "rdft.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"
 
 #include "qdm2data.h"
@@ -170,6 +171,7 @@ typedef struct {
     float output_buffer[1024];
 
     /// Synthesis filter
+    MPADSPContext mpadsp;
     DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
     int synth_buf_offset[MPA_MAX_CHANNELS];
     DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
@@ -1616,7 +1618,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
         OUT_INT *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&q->mpadsp,
+                q->synth_buf[ch], &(q->synth_buf_offset[ch]),
                 ff_mpa_synth_window_fixed, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
@@ -1863,6 +1866,7 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     }
 
     ff_rdft_init(&s->rdft_ctx, s->fft_order, IDFT_C2R);
+    ff_mpadsp_init(&s->mpadsp);
 
     qdm2_init(s);
 
diff --git a/libavcodec/x86/mpegaudiodec_mmx.c b/libavcodec/x86/mpegaudiodec_mmx.c
index ce5b7d6df8..b64461513e 100644
--- a/libavcodec/x86/mpegaudiodec_mmx.c
+++ b/libavcodec/x86/mpegaudiodec_mmx.c
@@ -21,9 +21,8 @@
 
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"
 
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -148,11 +147,11 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
     *out = sum;
 }
 
-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s)
+void ff_mpadsp_init_mmx(MPADSPContext *s)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_SSE2) {
-        s->apply_window_mp3 = apply_window_mp3;
+        s->apply_window_float = apply_window_mp3;
     }
 }

From 918d0584a452bf76264e717006f2cfc40b0de21f Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 14:22:25 +0100
Subject: [PATCH 102/830] mpegaudio: move some struct definitions from
 mpegaudio.h

These structs are only used in mpegaudiodec.c, so move them there
and remove no longer needed #include lines from mpegaudio.h.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h       | 55 ------------------------------------
 libavcodec/mpegaudiodec.c    | 46 ++++++++++++++++++++++++++++++
 libavcodec/mpegaudiodectab.h |  7 +++++
 3 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index c33960e987..8c6d6ef066 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -31,8 +31,6 @@
 #endif
 
 #include "avcodec.h"
-#include "get_bits.h"
-#include "dsputil.h"
 
 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@@ -81,29 +79,6 @@ typedef int16_t MPA_INT;
 typedef int32_t MPA_INT;
 #endif
 
-#define BACKSTEP_SIZE 512
-#define EXTRABYTES 24
-
-/* layer 3 "granule" */
-typedef struct GranuleDef {
-    uint8_t scfsi;
-    int part2_3_length;
-    int big_values;
-    int global_gain;
-    int scalefac_compress;
-    uint8_t block_type;
-    uint8_t switch_point;
-    int table_select[3];
-    int subblock_gain[3];
-    uint8_t scalefac_scale;
-    uint8_t count1table_select;
-    int region_size[3]; /* number of huffman codes in each region */
-    int preflag;
-    int short_start, long_end; /* long/short band indexes */
-    uint8_t scale_factors[40];
-    INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */
-} GranuleDef;
-
 #define MPA_DECODE_HEADER \
     int frame_size; \
     int error_protection; \
@@ -120,36 +95,6 @@ typedef struct MPADecodeHeader {
   MPA_DECODE_HEADER
 } MPADecodeHeader;
 
-typedef struct MPADecodeContext {
-    MPA_DECODE_HEADER
-    uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES];
-    int last_buf_size;
-    /* next header (used in free format parsing) */
-    uint32_t free_format_next_header;
-    GetBitContext gb;
-    GetBitContext in_gb;
-    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
-    int synth_buf_offset[MPA_MAX_CHANNELS];
-    DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
-    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
-    GranuleDef granules[2][2]; /* Used in Layer 3 */
-#ifdef DEBUG
-    int frame_count;
-#endif
-    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
-    int dither_state;
-    int error_recognition;
-    AVCodecContext* avctx;
-    MPADSPContext mpadsp;
-} MPADecodeContext;
-
-/* layer 3 huffman tables */
-typedef struct HuffTable {
-    int xsize;
-    const uint8_t *bits;
-    const uint16_t *codes;
-} HuffTable;
-
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
 
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index cc193c68d0..f0d9958d2b 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -39,6 +39,52 @@
 #include "mpegaudio.h"
 #include "mpegaudiodecheader.h"
 
+#define BACKSTEP_SIZE 512
+#define EXTRABYTES 24
+
+/* layer 3 "granule" */
+typedef struct GranuleDef {
+    uint8_t scfsi;
+    int part2_3_length;
+    int big_values;
+    int global_gain;
+    int scalefac_compress;
+    uint8_t block_type;
+    uint8_t switch_point;
+    int table_select[3];
+    int subblock_gain[3];
+    uint8_t scalefac_scale;
+    uint8_t count1table_select;
+    int region_size[3]; /* number of huffman codes in each region */
+    int preflag;
+    int short_start, long_end; /* long/short band indexes */
+    uint8_t scale_factors[40];
+    INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */
+} GranuleDef;
+
+typedef struct MPADecodeContext {
+    MPA_DECODE_HEADER
+    uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES];
+    int last_buf_size;
+    /* next header (used in free format parsing) */
+    uint32_t free_format_next_header;
+    GetBitContext gb;
+    GetBitContext in_gb;
+    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
+    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
+    GranuleDef granules[2][2]; /* Used in Layer 3 */
+#ifdef DEBUG
+    int frame_count;
+#endif
+    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
+    int dither_state;
+    int error_recognition;
+    AVCodecContext* avctx;
+    MPADSPContext mpadsp;
+} MPADecodeContext;
+
 #if CONFIG_FLOAT
 #   define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
diff --git a/libavcodec/mpegaudiodectab.h b/libavcodec/mpegaudiodectab.h
index fdcf83fb88..041d1860b7 100644
--- a/libavcodec/mpegaudiodectab.h
+++ b/libavcodec/mpegaudiodectab.h
@@ -33,6 +33,13 @@
 /*******************************************************/
 /* layer 3 tables */
 
+/* layer 3 huffman tables */
+typedef struct HuffTable {
+    int xsize;
+    const uint8_t *bits;
+    const uint16_t *codes;
+} HuffTable;
+
 /* layer3 scale factor size */
 static const uint8_t slen_table[2][16] = {
     { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },

From c7bbc6cd7a7e26c6c6f26e1b06f8ce354c7a1a46 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 18:59:25 +0100
Subject: [PATCH 103/830] mpegaudio: merge two #if CONFIG_FLOAT blocks

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 8c6d6ef066..5929db8687 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -62,21 +62,18 @@
 
 #define FIX(a)   ((int)((a) * FRAC_ONE))
 
-#if CONFIG_FLOAT
-typedef float OUT_INT;
-#else
-typedef int16_t OUT_INT;
-#endif
-
 #if CONFIG_FLOAT
 #   define INTFLOAT float
 typedef float MPA_INT;
+typedef float OUT_INT;
 #elif FRAC_BITS <= 15
 #   define INTFLOAT int
 typedef int16_t MPA_INT;
+typedef int16_t OUT_INT;
 #else
 #   define INTFLOAT int
 typedef int32_t MPA_INT;
+typedef int16_t OUT_INT;
 #endif
 
 #define MPA_DECODE_HEADER \

From b122c651075814722ade6f93c46cb2ee08c45b49 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 12:31:05 +0100
Subject: [PATCH 104/830] asfdec: add missing #include for av_bswap32()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavformat/asfdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index ed02d40fb9..30642a61fa 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -21,6 +21,7 @@
 
 //#define DEBUG
 
+#include "libavutil/bswap.h"
 #include "libavutil/common.h"
 #include "libavutil/avstring.h"
 #include "libavcodec/mpegaudio.h"

From d7d21c9f4befe8fea9596e41e691a9fba8f377e2 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 12:48:33 +0100
Subject: [PATCH 105/830] mpegaudio: remove useless #undef at end of file

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudioenc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index e6f2e3b9c2..50876ec2a4 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -776,5 +776,3 @@ AVCodec ff_mp2_encoder = {
     .supported_samplerates= (const int[]){44100, 48000,  32000, 22050, 24000, 16000, 0},
     .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
 };
-
-#undef FIX

From c2a16e44f8ed130c2b492f1f3ce09f7f55a7d4a4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 14:01:11 +0100
Subject: [PATCH 106/830] mpegaudio: remove unused version of SAME_HEADER_MASK

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h        | 4 ----
 libavcodec/mpegaudio_parser.c | 1 -
 2 files changed, 5 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 5929db8687..a46ecc5a9f 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -47,10 +47,6 @@
 #define MPA_DUAL    2
 #define MPA_MONO    3
 
-/* header + layer + bitrate + freq + lsf/mpeg25 */
-#define SAME_HEADER_MASK \
-   (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19))
-
 #define MP3_MASK 0xFFFE0CCF
 
 #ifndef FRAC_BITS
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 3bf1a18636..7cfd107d53 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -35,7 +35,6 @@ typedef struct MpegAudioParseContext {
 #define MPA_HEADER_SIZE 4
 
 /* header + layer + bitrate + freq + lsf/mpeg25 */
-#undef SAME_HEADER_MASK /* mpegaudio.h defines different version */
 #define SAME_HEADER_MASK \
    (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
 

From 429059f866259b447233c3fa23fffddce3e39508 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 14:23:59 +0200
Subject: [PATCH 107/830] configure: Adjust AVX assembler check.

Older nasm versions have trouble assembling certain AVX instructions, but the
current AVX check did not detect this. Update the check to use an instruction
that triggers the nasm problem.
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 6becb09724..96867a550e 100755
--- a/configure
+++ b/configure
@@ -2729,7 +2729,7 @@ EOF
 
         check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
             die "yasm not found, use --disable-yasm for a crippled build"
-        check_yasm "vpaddw xmm0, xmm0, xmm0" || disable avx
+        check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
     fi
 
     case "$cpu" in

From 0b5e44ed2922f2abe0de9670d099666ca3622a69 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 16:26:39 +0100
Subject: [PATCH 108/830] mpegaudiodsp: fix x86 and ppc makefiles

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/ppc/Makefile | 6 +-----
 libavcodec/x86/Makefile | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 35ea0c38f8..8e37fc791d 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -7,11 +7,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER)     += ppc/vp3dsp_altivec.o
 ALTIVEC-OBJS-$(CONFIG_VP6_DECODER)     += ppc/vp3dsp_altivec.o
 ALTIVEC-OBJS-$(CONFIG_VP8_DECODER)     += ppc/vp8dsp_altivec.o
 
-ALTIVEC-OBJS-$(CONFIG_MP1FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += ppc/mpegaudiodec_altivec.o
+ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP)    += ppc/mpegaudiodec_altivec.o
 
 FFT-OBJS-$(HAVE_GNU_AS)                += ppc/fft_altivec_s.o           \
 
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1cde9517a5..5f428501e3 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -21,11 +21,7 @@ YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
 MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
-MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += x86/mpegaudiodec_mmx.o
+MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o

From 7ed0a77ff4d19eb77671ce786ef515e5ffb1496e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 17:39:50 +0200
Subject: [PATCH 109/830] Remove silly insults from avformat_version() Doxygen
 documentation.

---
 libavformat/avformat.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index aca246d95a..3d4cc68f89 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -23,8 +23,7 @@
 
 
 /**
- * I return the LIBAVFORMAT_VERSION_INT constant.  You got
- * a fucking problem with that, douchebag?
+ * Return the LIBAVFORMAT_VERSION_INT constant.
  */
 unsigned avformat_version(void);
 

From d9a69f730e180ab9b1cac8a4d53d07b1ec375a38 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:54:36 +0200
Subject: [PATCH 110/830] Simplify CLEANFILES make variable by using wildcards.

Also ensures that generated file cos_fixed_tables.c is deleted on 'make clean'.
---
 libavcodec/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b26c33de63..4765ceb413 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -670,7 +670,7 @@ HOSTPROGS = costablegen
 
 DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
-CLEANFILES = sin_tables.c cos_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
+CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
 
 include $(SUBDIR)../subdir.mak
 

From 8a0572b05451b9a161989ab1ec2c03447a4c1ce6 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:59:50 +0200
Subject: [PATCH 111/830] Ignore generated tables and generated table generator
 programs.

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3d5e38b6bd..8887980bc0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,9 @@ ffmpeg
 ffplay
 ffprobe
 ffserver
+libavcodec/*_tablegen
+libavcodec/*_tables.c
+libavcodec/*_tables.h
 libavcodec/libavcodec*
 libavdevice/libavdevice*
 libavfilter/libavfilter*

From c98657a21536ef71a1d8d6a7ff4d69dd19d9e5b7 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 18:12:17 +0200
Subject: [PATCH 112/830] Remove potentially unstable filenames from comments
 in generated files.

---
 libavcodec/costablegen.c | 2 +-
 libavcodec/tableprint.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c
index 65c492696b..6bfb8eabf2 100644
--- a/libavcodec/costablegen.c
+++ b/libavcodec/costablegen.c
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
     int fixed  = argc > 2 && !strcmp(argv[2], "fixed");
     double (*func)(double) = do_sin ? sin : cos;
 
-    printf("/* This file was generated by libavcodec/costablegen */\n");
+    printf("/* This file was automatically generated. */\n");
     printf("#define CONFIG_FFT_FLOAT %d\n", !fixed);
     printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h");
     for (i = 4; i <= BITS; i++) {
diff --git a/libavcodec/tableprint.c b/libavcodec/tableprint.c
index da77525626..362dc24c50 100644
--- a/libavcodec/tableprint.c
+++ b/libavcodec/tableprint.c
@@ -36,6 +36,6 @@ WRITE_2D_FUNC(uint32_t)
 WRITE_2D_FUNC(float)
 
 void write_fileheader(void) {
-    printf("/* This file was generated by libavcodec/tableprint */\n");
+    printf("/* This file was automatically generated. */\n");
     printf("#include <stdint.h>\n");
 }

From 272874c9dbde5d48884c417b76f3c7f04938c92f Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 18:34:40 +0100
Subject: [PATCH 113/830] Simplify trig table rules

This collapses the make rules for the trig tables into a pattern
rule.  Based on a patch by Diego, modified to avoid using fragile
make constructs and allow future addition of fixed-point sin tables.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/Makefile      | 11 ++++-------
 libavcodec/costablegen.c |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4765ceb413..7a9d897606 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -676,14 +676,11 @@ include $(SUBDIR)../subdir.mak
 
 $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o
 
-$(SUBDIR)cos_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< > $@
+TRIG_TABLES  = cos cos_fixed sin
+TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
 
-$(SUBDIR)cos_fixed_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< cos fixed > $@
-
-$(SUBDIR)sin_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< sin > $@
+$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
+	$(M)./$< $* > $@
 
 ifdef CONFIG_SMALL
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1
diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c
index 6bfb8eabf2..5e52c482c6 100644
--- a/libavcodec/costablegen.c
+++ b/libavcodec/costablegen.c
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
 {
     int i, j;
     int do_sin = argc > 1 && !strcmp(argv[1], "sin");
-    int fixed  = argc > 2 && !strcmp(argv[2], "fixed");
+    int fixed  = argc > 1 &&  strstr(argv[1], "fixed");
     double (*func)(double) = do_sin ? sin : cos;
 
     printf("/* This file was automatically generated. */\n");

From 9f2405661d5bcc9416f4b3339f1139997467e1f5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 19:22:41 +0200
Subject: [PATCH 114/830] Collapse tableprint.c into tableprint.h.

tableprint.c serves little purpose on its own and removing it allows building
the table generator programs with the normal HOSTPROGS Makefile rules.
---
 libavcodec/Makefile     |  7 +++----
 libavcodec/tableprint.c | 41 -----------------------------------------
 libavcodec/tableprint.h | 24 ++++++++++++++++++++----
 3 files changed, 23 insertions(+), 49 deletions(-)
 delete mode 100644 libavcodec/tableprint.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7a9d897606..d0abe69202 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -666,7 +666,9 @@ TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
 
-HOSTPROGS = costablegen
+HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen costablegen       \
+            dv_tablegen motionpixels_tablegen mpegaudio_tablegen        \
+            pcm_tablegen qdm2_tablegen sinewin_tablegen
 
 DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
@@ -688,9 +690,6 @@ else
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0
 endif
 
-$(SUBDIR)%_tablegen$(HOSTEXESUF): $(SUBDIR)%_tablegen.c $(SUBDIR)%_tablegen.h $(SUBDIR)tableprint.c
-	$(HOSTCC) $(HOSTCFLAGS) $(HOSTLDFLAGS) -o $@ $(filter %.c,$^) $(HOSTLIBS)
-
 GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dv_tables.h     \
               sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \
               pcm_tables.h qdm2_tables.h
diff --git a/libavcodec/tableprint.c b/libavcodec/tableprint.c
deleted file mode 100644
index 362dc24c50..0000000000
--- a/libavcodec/tableprint.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Generate a file for hardcoded tables
- *
- * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <inttypes.h>
-#include "tableprint.h"
-
-WRITE_1D_FUNC(int8_t,   "%3"PRIi8, 15)
-WRITE_1D_FUNC(uint8_t,  "0x%02"PRIx8, 15)
-WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7)
-WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7)
-WRITE_1D_FUNC(float,    "%.18e", 3)
-
-WRITE_2D_FUNC(int8_t)
-WRITE_2D_FUNC(uint8_t)
-WRITE_2D_FUNC(uint32_t)
-WRITE_2D_FUNC(float)
-
-void write_fileheader(void) {
-    printf("/* This file was automatically generated. */\n");
-    printf("#include <stdint.h>\n");
-}
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index af69fe8580..ddf2635da0 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -23,8 +23,9 @@
 #ifndef AVCODEC_TABLEPRINT_H
 #define AVCODEC_TABLEPRINT_H
 
-#include <stdint.h>
+#include <inttypes.h>
 #include <stdio.h>
+
 #include "libavutil/common.h"
 
 #define WRITE_1D_FUNC_ARGV(type, linebrk, fmtstr, ...)\
@@ -70,9 +71,6 @@ void write_uint32_t_2d_array(const void *, int, int);
 void write_float_2d_array   (const void *, int, int);
 /** \} */ // end of printfuncs group
 
-/** Write a standard file header */
-void write_fileheader(void);
-
 #define WRITE_ARRAY(prefix, type, name)                 \
     do {                                                \
         const size_t array_size = FF_ARRAY_ELEMS(name); \
@@ -92,4 +90,22 @@ void write_fileheader(void);
         printf("};\n");                                                 \
     } while(0)
 
+
+WRITE_1D_FUNC(int8_t,   "%3"PRIi8, 15)
+WRITE_1D_FUNC(uint8_t,  "0x%02"PRIx8, 15)
+WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7)
+WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7)
+WRITE_1D_FUNC(float,    "%.18e", 3)
+
+WRITE_2D_FUNC(int8_t)
+WRITE_2D_FUNC(uint8_t)
+WRITE_2D_FUNC(uint32_t)
+WRITE_2D_FUNC(float)
+
+static inline void write_fileheader(void)
+{
+    printf("/* This file was automatically generated. */\n");
+    printf("#include <stdint.h>\n");
+}
+
 #endif /* AVCODEC_TABLEPRINT_H */

From 4887f8245c5dcd2a27817a678122eea4d1a0f31a Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:53:07 +0200
Subject: [PATCH 115/830] Rename costablegen.c ---> cos_tablegen.c.

This is consistent with how all other table generation programs are named.
Moreover this ensures that the cos table generation program is correctly
deleted when cleaning the tree.
---
 libavcodec/Makefile                          | 4 ++--
 libavcodec/{costablegen.c => cos_tablegen.c} | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename libavcodec/{costablegen.c => cos_tablegen.c} (100%)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index d0abe69202..6cb59a3c43 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -666,7 +666,7 @@ TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
 
-HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen costablegen       \
+HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen cos_tablegen      \
             dv_tablegen motionpixels_tablegen mpegaudio_tablegen        \
             pcm_tablegen qdm2_tablegen sinewin_tablegen
 
@@ -681,7 +681,7 @@ $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o
 TRIG_TABLES  = cos cos_fixed sin
 TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
 
-$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
+$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF)
 	$(M)./$< $* > $@
 
 ifdef CONFIG_SMALL
diff --git a/libavcodec/costablegen.c b/libavcodec/cos_tablegen.c
similarity index 100%
rename from libavcodec/costablegen.c
rename to libavcodec/cos_tablegen.c

From 6070b7e1c520e9ca389403bae20a2ad04c7d54c7 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 7 May 2011 21:35:08 +0200
Subject: [PATCH 116/830] vsrc_buffer: remove dependency on AVFrame

Rename av_vsrc_buffer_add_frame to
av_vsrc_buffer_add_video_buffer_ref(), and change its inteface to make
it accept in input an AVFilterBufferRef rather than an AVFrame.

This way the interface can be used without requiring the
inclusion/installation of libavcodec headers.
---
 ffmpeg.c                  | 11 ++++++--
 libavfilter/vsrc_buffer.c | 57 ++++++++++++++++-----------------------
 libavfilter/vsrc_buffer.h |  7 +++--
 3 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 90c7d71d2f..340a947dd8 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1643,13 +1643,20 @@ static int output_packet(AVInputStream *ist, int ist_index,
 #if CONFIG_AVFILTER
         if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
             for(i=0;i<nb_ostreams;i++) {
+                AVFilterBufferRef *picref;
                 ost = ost_table[i];
                 if (ost->input_video_filter && ost->source_index == ist_index) {
                     if (!picture.sample_aspect_ratio.num)
                         picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
                     picture.pts = ist->pts;
-                    // add it to be filtered
-                    av_vsrc_buffer_add_frame2(ost->input_video_filter, &picture, ""); //TODO user setable params
+
+                    picref = avfilter_get_video_buffer_ref_from_arrays(
+                        picture.data, picture.linesize, AV_PERM_WRITE,
+                        picture.width, picture.height, picture.format);
+                    avfilter_copy_frame_props(picref, &picture);
+                    av_vsrc_buffer_add_video_buffer_ref2(ost->input_video_filter, picref, ""); //TODO user setable params
+                    picref->buf->data[0] = NULL;
+                    avfilter_unref_buffer(picref);
                 }
             }
         }
diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index 84f3b33c3f..b593a207de 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -29,8 +29,7 @@
 #include "libavutil/imgutils.h"
 
 typedef struct {
-    AVFrame           frame;
-    int               has_frame;
+    AVFilterBufferRef *picref;
     int               h, w;
     enum PixelFormat  pix_fmt;
     AVRational        time_base;     ///< time_base to set in the output link
@@ -38,13 +37,14 @@ typedef struct {
     char              sws_param[256];
 } BufferSourceContext;
 
-int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame,
-                              const char *sws_param)
+int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilterBufferRef *picref,
+                                         const char *sws_param)
 {
     BufferSourceContext *c = buffer_filter->priv;
+    AVFilterLink *outlink = buffer_filter->outputs[0];
     int ret;
 
-    if (c->has_frame) {
+    if (c->picref) {
         av_log(buffer_filter, AV_LOG_ERROR,
                "Buffering several frames is not supported. "
                "Please consume all available frames before adding a new one.\n"
@@ -56,14 +56,14 @@ int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame,
         snprintf(c->sws_param, 255, "%d:%d:%s", c->w, c->h, sws_param);
     }
 
-    if (frame->width != c->w || frame->height != c->h || frame->format != c->pix_fmt) {
+    if (picref->video->w != c->w || picref->video->h != c->h || picref->format != c->pix_fmt) {
         AVFilterContext *scale= buffer_filter->outputs[0]->dst;
         AVFilterLink *link;
 
         av_log(buffer_filter, AV_LOG_INFO,
                "Buffer video input changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n",
                c->w, c->h, av_pix_fmt_descriptors[c->pix_fmt].name,
-               frame->width, frame->height, av_pix_fmt_descriptors[frame->format].name);
+               picref->video->w, picref->video->h, av_pix_fmt_descriptors[picref->format].name);
 
         if(!scale || strcmp(scale->filter->name,"scale")){
             AVFilter *f= avfilter_get_by_name("scale");
@@ -89,26 +89,28 @@ int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame,
             scale->filter->init(scale, c->sws_param, NULL);
         }
 
-        c->pix_fmt = scale->inputs[0]->format = frame->format;
-        c->w       = scale->inputs[0]->w      = frame->width;
-        c->h       = scale->inputs[0]->h      = frame->height;
+        c->pix_fmt = scale->inputs[0]->format = picref->format;
+        c->w       = scale->inputs[0]->w      = picref->video->w;
+        c->h       = scale->inputs[0]->h      = picref->video->h;
 
         link= scale->outputs[0];
         if ((ret =  link->srcpad->config_props(link)) < 0)
             return ret;
     }
 
-    c->frame = *frame;
-    memcpy(c->frame.data    , frame->data    , sizeof(frame->data));
-    memcpy(c->frame.linesize, frame->linesize, sizeof(frame->linesize));
-    c->has_frame = 1;
+    c->picref = avfilter_get_video_buffer(outlink, AV_PERM_WRITE,
+                                          picref->video->w, picref->video->h);
+    av_image_copy(c->picref->data, c->picref->linesize,
+                  picref->data, picref->linesize,
+                  picref->format, picref->video->w, picref->video->h);
+    avfilter_copy_buffer_ref_props(c->picref, picref);
 
     return 0;
 }
 
-int av_vsrc_buffer_add_frame(AVFilterContext *buffer_filter, AVFrame *frame)
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref)
 {
-    return av_vsrc_buffer_add_frame2(buffer_filter, frame, "");
+    return av_vsrc_buffer_add_video_buffer_ref2(buffer_filter, picref, "");
 }
 
 static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
@@ -164,31 +166,18 @@ static int config_props(AVFilterLink *link)
 static int request_frame(AVFilterLink *link)
 {
     BufferSourceContext *c = link->src->priv;
-    AVFilterBufferRef *picref;
 
-    if (!c->has_frame) {
+    if (!c->picref) {
         av_log(link->src, AV_LOG_ERROR,
                "request_frame() called with no available frame!\n");
         //return -1;
     }
 
-    /* This picture will be needed unmodified later for decoding the next
-     * frame */
-    picref = avfilter_get_video_buffer(link, AV_PERM_WRITE | AV_PERM_PRESERVE |
-                                       AV_PERM_REUSE2,
-                                       link->w, link->h);
-
-    av_image_copy(picref->data, picref->linesize,
-                  c->frame.data, c->frame.linesize,
-                  picref->format, link->w, link->h);
-    avfilter_copy_frame_props(picref, &c->frame);
-
-    avfilter_start_frame(link, avfilter_ref_buffer(picref, ~0));
+    avfilter_start_frame(link, avfilter_ref_buffer(c->picref, ~0));
     avfilter_draw_slice(link, 0, link->h, 1);
     avfilter_end_frame(link);
-    avfilter_unref_buffer(picref);
-
-    c->has_frame = 0;
+    avfilter_unref_buffer(c->picref);
+    c->picref = NULL;
 
     return 0;
 }
@@ -196,7 +185,7 @@ static int request_frame(AVFilterLink *link)
 static int poll_frame(AVFilterLink *link)
 {
     BufferSourceContext *c = link->src->priv;
-    return !!(c->has_frame);
+    return !!(c->picref);
 }
 
 AVFilter avfilter_vsrc_buffer = {
diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index 2dda546e01..eb9ec56edd 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -27,12 +27,11 @@
  * memory buffer source API for video
  */
 
-#include "libavcodec/avcodec.h" /* AVFrame */
 #include "avfilter.h"
 
-int av_vsrc_buffer_add_frame(AVFilterContext *buffer_filter, AVFrame *frame);
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref);
 
-int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame,
-                              const char *sws_param);
+int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilterBufferRef *picref,
+                                         const char *sws_param);
 
 #endif /* AVFILTER_VSRC_BUFFER_H */

From 9fdf77217b39646afdb8907b977e3d7a59f1cb9e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 14 May 2011 11:46:14 +0200
Subject: [PATCH 117/830] lavfi: add avfilter_get_video_buffer_ref_from_frame
 to avcodec.h

Simplify passing AVFrame data to av_vsrc_buffer_add_video_buffer_ref().
---
 doc/APIchanges         |  3 +++
 ffmpeg.c               |  8 +++-----
 libavfilter/avcodec.c  | 13 +++++++++++++
 libavfilter/avcodec.h  |  8 ++++++++
 libavfilter/avfilter.h |  2 +-
 5 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index d9e7d326bd..cd1f88894d 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
+  Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.
+
 2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt
   Add request_sample_fmt field to AVCodecContext.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index 340a947dd8..d4c7705a09 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1650,11 +1650,9 @@ static int output_packet(AVInputStream *ist, int ist_index,
                         picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
                     picture.pts = ist->pts;
 
-                    picref = avfilter_get_video_buffer_ref_from_arrays(
-                        picture.data, picture.linesize, AV_PERM_WRITE,
-                        picture.width, picture.height, picture.format);
-                    avfilter_copy_frame_props(picref, &picture);
-                    av_vsrc_buffer_add_video_buffer_ref2(ost->input_video_filter, picref, ""); //TODO user setable params
+                    picref =
+                        avfilter_get_video_buffer_ref_from_frame(&picture, AV_PERM_WRITE);
+                    av_vsrc_buffer_add_video_buffer_ref(ost->input_video_filter, picref, ""); //TODO user setable params
                     picref->buf->data[0] = NULL;
                     avfilter_unref_buffer(picref);
                 }
diff --git a/libavfilter/avcodec.c b/libavfilter/avcodec.c
index c2f8651106..50670bc55e 100644
--- a/libavfilter/avcodec.c
+++ b/libavfilter/avcodec.c
@@ -40,3 +40,16 @@ void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src)
         dst->video->pict_type           = src->pict_type;
     }
 }
+
+AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame,
+                                                            int perms)
+{
+    AVFilterBufferRef *picref =
+        avfilter_get_video_buffer_ref_from_arrays(frame->data, frame->linesize, perms,
+                                                  frame->width, frame->height,
+                                                  frame->format);
+    if (!picref)
+        return NULL;
+    avfilter_copy_frame_props(picref, frame);
+    return picref;
+}
diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h
index f438860d0b..e6ae907391 100644
--- a/libavfilter/avcodec.h
+++ b/libavfilter/avcodec.h
@@ -37,4 +37,12 @@
  */
 void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src);
 
+/**
+ * Create and return a picref reference from the data and properties
+ * contained in frame.
+ *
+ * @param perms permissions to assign to the new buffer reference
+ */
+AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, int perms);
+
 #endif /* AVFILTER_AVCODEC_H */
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 03a1e49a46..171c9e4c74 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR  5
+#define LIBAVFILTER_VERSION_MINOR  6
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

From 3799805e560f168811808ee0ba3befcb6fddc50e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 13 May 2011 18:07:51 +0200
Subject: [PATCH 118/830] vsrc_buffer: fix style

---
 libavfilter/vsrc_buffer.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index b593a207de..b41007c0da 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -52,12 +52,12 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
         //return -1;
     }
 
-    if(!c->sws_param[0]){
+    if (!c->sws_param[0]) {
         snprintf(c->sws_param, 255, "%d:%d:%s", c->w, c->h, sws_param);
     }
 
     if (picref->video->w != c->w || picref->video->h != c->h || picref->format != c->pix_fmt) {
-        AVFilterContext *scale= buffer_filter->outputs[0]->dst;
+        AVFilterContext *scale = buffer_filter->outputs[0]->dst;
         AVFilterLink *link;
 
         av_log(buffer_filter, AV_LOG_INFO,
@@ -65,26 +65,26 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
                c->w, c->h, av_pix_fmt_descriptors[c->pix_fmt].name,
                picref->video->w, picref->video->h, av_pix_fmt_descriptors[picref->format].name);
 
-        if(!scale || strcmp(scale->filter->name,"scale")){
-            AVFilter *f= avfilter_get_by_name("scale");
+        if (!scale || strcmp(scale->filter->name, "scale")) {
+            AVFilter *f = avfilter_get_by_name("scale");
 
             av_log(buffer_filter, AV_LOG_INFO, "Inserting scaler filter\n");
-            if(avfilter_open(&scale, f, "Input equalizer") < 0)
+            if (avfilter_open(&scale, f, "Input equalizer") < 0)
                 return -1;
 
-            if((ret=avfilter_init_filter(scale, c->sws_param, NULL))<0){
+            if ((ret = avfilter_init_filter(scale, c->sws_param, NULL)) < 0) {
                 avfilter_free(scale);
                 return ret;
             }
 
-            if((ret=avfilter_insert_filter(buffer_filter->outputs[0], scale, 0, 0))<0){
+            if ((ret = avfilter_insert_filter(buffer_filter->outputs[0], scale, 0, 0)) < 0) {
                 avfilter_free(scale);
                 return ret;
             }
             scale->outputs[0]->time_base = scale->inputs[0]->time_base;
 
             scale->outputs[0]->format= c->pix_fmt;
-        } else if(!strcmp(scale->filter->name, "scale")) {
+        } else if (!strcmp(scale->filter->name, "scale")) {
             snprintf(c->sws_param, 255, "%d:%d:%s", scale->outputs[0]->w, scale->outputs[0]->h, sws_param);
             scale->filter->init(scale, c->sws_param, NULL);
         }
@@ -93,7 +93,7 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
         c->w       = scale->inputs[0]->w      = picref->video->w;
         c->h       = scale->inputs[0]->h      = picref->video->h;
 
-        link= scale->outputs[0];
+        link = scale->outputs[0];
         if ((ret =  link->srcpad->config_props(link)) < 0)
             return ret;
     }

From 509b32cf5d5656473e277ac43dbb2ce9da66bff2 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 13 May 2011 18:09:47 +0200
Subject: [PATCH 119/830] vsrc_buffer: propagate avfilter_open() error code

---
 libavfilter/vsrc_buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index b41007c0da..effc232d5e 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -69,8 +69,8 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
             AVFilter *f = avfilter_get_by_name("scale");
 
             av_log(buffer_filter, AV_LOG_INFO, "Inserting scaler filter\n");
-            if (avfilter_open(&scale, f, "Input equalizer") < 0)
-                return -1;
+            if ((ret = avfilter_open(&scale, f, "Input equalizer")) < 0)
+                return ret;
 
             if ((ret = avfilter_init_filter(scale, c->sws_param, NULL)) < 0) {
                 avfilter_free(scale);

From 50764e19a8edc018b6e5276f1b3e4215ba66217f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 01:02:54 +0200
Subject: [PATCH 120/830] vsrc_buffer: make the source accept sws_param in init

Avoid the need of two distinct av_vsrc_add_video_buffer_ref*
functions. Simplify the interface.
---
 doc/filters.texi          | 10 ++++++++--
 ffmpeg.c                  |  2 +-
 libavfilter/avfilter.h    |  2 +-
 libavfilter/vsrc_buffer.c | 33 ++++++++++++++-------------------
 libavfilter/vsrc_buffer.h |  3 ---
 5 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 523e279d46..1407828cbe 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1536,9 +1536,10 @@ This source is mainly intended for a programmatic use, in particular
 through the interface defined in @file{libavfilter/vsrc_buffer.h}.
 
 It accepts the following parameters:
-@var{width}:@var{height}:@var{pix_fmt_string}:@var{timebase_num}:@var{timebase_den}:@var{sample_aspect_ratio_num}:@var{sample_aspect_ratio.den}
+@var{width}:@var{height}:@var{pix_fmt_string}:@var{timebase_num}:@var{timebase_den}:@var{sample_aspect_ratio_num}:@var{sample_aspect_ratio.den}:@var{scale_params}
 
-All the parameters need to be explicitely defined.
+All the parameters but @var{scale_params} need to be explicitely
+defined.
 
 Follows the list of the accepted parameters.
 
@@ -1559,6 +1560,11 @@ timestamps of the buffered frames.
 @item sample_aspect_ratio.num, sample_aspect_ratio.den
 Specify numerator and denominator of the sample aspect ratio assumed
 by the video frames.
+
+@item scale_params
+Specify the optional parameters to be used for the scale filter which
+is automatically inserted when an input change is detected in the
+input size or format.
 @end table
 
 For example:
diff --git a/ffmpeg.c b/ffmpeg.c
index d4c7705a09..182f92945b 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1652,7 +1652,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
 
                     picref =
                         avfilter_get_video_buffer_ref_from_frame(&picture, AV_PERM_WRITE);
-                    av_vsrc_buffer_add_video_buffer_ref(ost->input_video_filter, picref, ""); //TODO user setable params
+                    av_vsrc_buffer_add_video_buffer_ref(ost->input_video_filter, picref);
                     picref->buf->data[0] = NULL;
                     avfilter_unref_buffer(picref);
                 }
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 171c9e4c74..a0ad35882f 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR  6
+#define LIBAVFILTER_VERSION_MINOR  7
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index effc232d5e..9815f945da 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -37,8 +37,7 @@ typedef struct {
     char              sws_param[256];
 } BufferSourceContext;
 
-int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilterBufferRef *picref,
-                                         const char *sws_param)
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref)
 {
     BufferSourceContext *c = buffer_filter->priv;
     AVFilterLink *outlink = buffer_filter->outputs[0];
@@ -52,13 +51,10 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
         //return -1;
     }
 
-    if (!c->sws_param[0]) {
-        snprintf(c->sws_param, 255, "%d:%d:%s", c->w, c->h, sws_param);
-    }
-
     if (picref->video->w != c->w || picref->video->h != c->h || picref->format != c->pix_fmt) {
         AVFilterContext *scale = buffer_filter->outputs[0]->dst;
         AVFilterLink *link;
+        char scale_param[1024];
 
         av_log(buffer_filter, AV_LOG_INFO,
                "Buffer video input changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n",
@@ -72,7 +68,8 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
             if ((ret = avfilter_open(&scale, f, "Input equalizer")) < 0)
                 return ret;
 
-            if ((ret = avfilter_init_filter(scale, c->sws_param, NULL)) < 0) {
+            snprintf(scale_param, sizeof(scale_param)-1, "%d:%d:%s", c->w, c->h, c->sws_param);
+            if ((ret = avfilter_init_filter(scale, scale_param, NULL)) < 0) {
                 avfilter_free(scale);
                 return ret;
             }
@@ -85,8 +82,9 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
 
             scale->outputs[0]->format= c->pix_fmt;
         } else if (!strcmp(scale->filter->name, "scale")) {
-            snprintf(c->sws_param, 255, "%d:%d:%s", scale->outputs[0]->w, scale->outputs[0]->h, sws_param);
-            scale->filter->init(scale, c->sws_param, NULL);
+            snprintf(scale_param, sizeof(scale_param)-1, "%d:%d:%s",
+                     scale->outputs[0]->w, scale->outputs[0]->h, c->sws_param);
+            scale->filter->init(scale, scale_param, NULL);
         }
 
         c->pix_fmt = scale->inputs[0]->format = picref->format;
@@ -108,24 +106,21 @@ int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilte
     return 0;
 }
 
-int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref)
-{
-    return av_vsrc_buffer_add_video_buffer_ref2(buffer_filter, picref, "");
-}
-
 static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
 {
     BufferSourceContext *c = ctx->priv;
     char pix_fmt_str[128];
     int n = 0;
+    *c->sws_param = 0;
 
     if (!args ||
-        (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str,
+        (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d:%255c", &c->w, &c->h, pix_fmt_str,
                     &c->time_base.num, &c->time_base.den,
-                    &c->sample_aspect_ratio.num, &c->sample_aspect_ratio.den)) != 7) {
-        av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but only %d found in '%s'\n", n, args);
+                    &c->sample_aspect_ratio.num, &c->sample_aspect_ratio.den, c->sws_param)) < 7) {
+        av_log(ctx, AV_LOG_ERROR, "Expected at least 7 arguments, but only %d found in '%s'\n", n, args);
         return AVERROR(EINVAL);
     }
+
     if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) {
         char *tail;
         c->pix_fmt = strtol(pix_fmt_str, &tail, 10);
@@ -135,10 +130,10 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
         }
     }
 
-    av_log(ctx, AV_LOG_INFO, "w:%d h:%d pixfmt:%s tb:%d/%d sar:%d/%d\n",
+    av_log(ctx, AV_LOG_INFO, "w:%d h:%d pixfmt:%s tb:%d/%d sar:%d/%d sws_param:%s\n",
            c->w, c->h, av_pix_fmt_descriptors[c->pix_fmt].name,
            c->time_base.num, c->time_base.den,
-           c->sample_aspect_ratio.num, c->sample_aspect_ratio.den);
+           c->sample_aspect_ratio.num, c->sample_aspect_ratio.den, c->sws_param);
     return 0;
 }
 
diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index eb9ec56edd..34fec0e61a 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -31,7 +31,4 @@
 
 int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref);
 
-int av_vsrc_buffer_add_video_buffer_ref2(AVFilterContext *buffer_filter, AVFilterBufferRef *picref,
-                                         const char *sws_param);
-
 #endif /* AVFILTER_VSRC_BUFFER_H */

From d3fddb8454cab525cbcb6cc9c3d0ca29b6c78cdd Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 22:51:28 +0200
Subject: [PATCH 121/830] vsrc_buffer: fix example in docs, add mandatory
 parameters

---
 doc/filters.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 1407828cbe..31a963d058 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1579,7 +1579,7 @@ Since the pixel format with name "yuv410p" corresponds to the number 6
 (check the enum PixelFormat definition in @file{libavutil/pixfmt.h}),
 this example corresponds to:
 @example
-buffer=320:240:6:1:24
+buffer=320:240:6:1:24:1:1
 @end example
 
 @section color

From c000a9f78390b71812c7ee5187bbccc3c2d79b1e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 01:17:16 +0200
Subject: [PATCH 122/830] vsrc_buffer: add av_vsrc_buffer_add_frame()

The new function is a wrapper around
av_vsrc_buffer_add_video_buffer_ref(), and allows to simplify the act
of pushing AVFrame data to the source buffer.
---
 doc/APIchanges            |  3 +++
 ffmpeg.c                  |  7 +------
 libavfilter/avcodec.h     |  9 +++++++++
 libavfilter/avfilter.h    |  2 +-
 libavfilter/vsrc_buffer.c | 17 +++++++++++++++++
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index cd1f88894d..710967c809 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-XX - XXXXXX - lavfi 2.8.0 - avcodec.h
+  Add av_vsrc_buffer_add_frame() to libavfilter/avcodec.h.
+
 2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
   Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index 182f92945b..2b19c7f21b 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1643,18 +1643,13 @@ static int output_packet(AVInputStream *ist, int ist_index,
 #if CONFIG_AVFILTER
         if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
             for(i=0;i<nb_ostreams;i++) {
-                AVFilterBufferRef *picref;
                 ost = ost_table[i];
                 if (ost->input_video_filter && ost->source_index == ist_index) {
                     if (!picture.sample_aspect_ratio.num)
                         picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
                     picture.pts = ist->pts;
 
-                    picref =
-                        avfilter_get_video_buffer_ref_from_frame(&picture, AV_PERM_WRITE);
-                    av_vsrc_buffer_add_video_buffer_ref(ost->input_video_filter, picref);
-                    picref->buf->data[0] = NULL;
-                    avfilter_unref_buffer(picref);
+                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture);
                 }
             }
         }
diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h
index e6ae907391..74434e819d 100644
--- a/libavfilter/avcodec.h
+++ b/libavfilter/avcodec.h
@@ -45,4 +45,13 @@ void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src);
  */
 AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, int perms);
 
+/**
+ * Add frame data to buffer_src.
+ *
+ * @param buffer_src pointer to a buffer source context
+ * @return >= 0 in case of success, a negative AVERROR code in case of
+ * failure
+ */
+int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, const AVFrame *frame);
+
 #endif /* AVFILTER_AVCODEC_H */
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index a0ad35882f..02f2ed2f77 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR  7
+#define LIBAVFILTER_VERSION_MINOR  8
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index 9815f945da..6a2fcbf36e 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -106,6 +106,23 @@ int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilter
     return 0;
 }
 
+#if CONFIG_AVCODEC
+#include "avcodec.h"
+
+int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, const AVFrame *frame)
+{
+    AVFilterBufferRef *picref =
+        avfilter_get_video_buffer_ref_from_frame(frame, AV_PERM_WRITE);
+    if (!picref)
+        return AVERROR(ENOMEM);
+    av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref);
+    picref->buf->data[0] = NULL;
+    avfilter_unref_buffer(picref);
+
+    return 0;
+}
+#endif
+
 static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
 {
     BufferSourceContext *c = ctx->priv;

From c78a85adf4a153914233e02b4d44f9414bc579d7 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 12:01:25 +0200
Subject: [PATCH 123/830] vsrc_buffer: document
 av_vsrc_buffer_add_video_buffer_ref()

---
 libavfilter/vsrc_buffer.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index 34fec0e61a..5307eadb70 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -29,6 +29,13 @@
 
 #include "avfilter.h"
 
-int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref);
+/**
+ * Add video buffer data in picref to buffer_src.
+ *
+ * @param buffer_src pointer to a buffer source context
+ * @return >= 0 in case of success, a negative AVERROR code in case of
+ * failure
+ */
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_src, AVFilterBufferRef *picref);
 
 #endif /* AVFILTER_VSRC_BUFFER_H */

From abe936388940e5d54057c74ba0afc3d228eccdc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 16:21:46 +0300
Subject: [PATCH 124/830] sdp: Allow passing AVFormatContext flags to the SDP
 generation

---
 libavformat/internal.h |  3 ++-
 libavformat/movenc.c   |  2 +-
 libavformat/sdp.c      | 11 ++++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/libavformat/internal.h b/libavformat/internal.h
index b8b520d0e2..0f121a0d5e 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -116,10 +116,11 @@ int ff_url_join(char *str, int size, const char *proto,
  * @param dest_type the destination address type, may be NULL
  * @param port the destination port of the media stream, 0 if unknown
  * @param ttl the time to live of the stream, 0 if not multicast
+ * @param flags the AVFormatContext->flags, modifying the generated SDP
  */
 void ff_sdp_write_media(char *buff, int size, AVCodecContext *c,
                         const char *dest_addr, const char *dest_type,
-                        int port, int ttl);
+                        int port, int ttl, int flags);
 
 /**
  * Write a packet to another muxer than the one the user originally
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 1b61706da3..7bdffcc722 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1324,7 +1324,7 @@ static int mov_write_udta_sdp(AVIOContext *pb, AVCodecContext *ctx, int index)
     char buf[1000] = "";
     int len;
 
-    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0);
+    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0, 0);
     av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index);
     len = strlen(buf);
 
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 923c57fbcd..f7aec1b766 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -299,7 +299,7 @@ xiph_fail:
     return NULL;
 }
 
-static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type)
+static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type, int flags)
 {
     char *config = NULL;
 
@@ -448,7 +448,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
     return buff;
 }
 
-void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl)
+void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, int flags)
 {
     const char *type;
     int payload_type;
@@ -471,7 +471,7 @@ void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *des
         av_strlcatf(buff, size, "b=AS:%d\r\n", c->bit_rate / 1000);
     }
 
-    sdp_write_media_attributes(buff, size, c, payload_type);
+    sdp_write_media_attributes(buff, size, c, payload_type, flags);
 }
 
 int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
@@ -520,7 +520,8 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
         for (j = 0; j < ac[i]->nb_streams; j++) {
             ff_sdp_write_media(buf, size,
                                   ac[i]->streams[j]->codec, dst[0] ? dst : NULL,
-                                  dst_type, (port > 0) ? port + j * 2 : 0, ttl);
+                                  dst_type, (port > 0) ? port + j * 2 : 0, ttl,
+                                  ac[i]->flags);
             if (port <= 0) {
                 av_strlcatf(buf, size,
                                    "a=control:streamid=%d\r\n", i + j);
@@ -536,7 +537,7 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
     return AVERROR(ENOSYS);
 }
 
-void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl)
+void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, int flags)
 {
 }
 #endif

From bd61b2a1cac5fcaa9970dffe3b28c52774ea2f09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 16:21:47 +0300
Subject: [PATCH 125/830] movenc: Pass AVFormatContext flags to the SDP
 generation

---
 libavformat/movenc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 7bdffcc722..a4ec51d082 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1319,12 +1319,12 @@ static int mov_write_uuid_tag_psp(AVIOContext *pb, MOVTrack *mov)
     return 0x34;
 }
 
-static int mov_write_udta_sdp(AVIOContext *pb, AVCodecContext *ctx, int index)
+static int mov_write_udta_sdp(AVIOContext *pb, AVFormatContext *ctx, int index)
 {
     char buf[1000] = "";
     int len;
 
-    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0, 0);
+    ff_sdp_write_media(buf, sizeof(buf), ctx->streams[0]->codec, NULL, NULL, 0, 0, ctx->flags);
     av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index);
     len = strlen(buf);
 
@@ -1352,7 +1352,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
     if (track->mode == MODE_PSP)
         mov_write_uuid_tag_psp(pb,track);  // PSP Movies require this uuid box
     if (track->tag == MKTAG('r','t','p',' '))
-        mov_write_udta_sdp(pb, track->rtp_ctx->streams[0]->codec, track->trackID);
+        mov_write_udta_sdp(pb, track->rtp_ctx, track->trackID);
     if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO && track->mode == MODE_MOV) {
         double sample_aspect_ratio = av_q2d(st->sample_aspect_ratio);
         if (0.0 != sample_aspect_ratio && 1.0 != sample_aspect_ratio)

From ef409645f06368bcdcedd1b7fe19e25699ae5082 Mon Sep 17 00:00:00 2001
From: Juan Carlos Rodriguez <ing.juancarlosrodriguez@hotmail.com>
Date: Wed, 18 May 2011 16:21:48 +0300
Subject: [PATCH 126/830] rtpenc: MP4A-LATM payload support

---
 libavformat/Makefile      |  1 +
 libavformat/avformat.h    |  1 +
 libavformat/options.c     |  1 +
 libavformat/rtpenc.c      |  5 ++-
 libavformat/rtpenc.h      |  1 +
 libavformat/rtpenc_latm.c | 60 +++++++++++++++++++++++++++++++
 libavformat/sdp.c         | 74 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 libavformat/rtpenc_latm.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 13fe2371bf..55f6152f8d 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -233,6 +233,7 @@ OBJS-$(CONFIG_RSO_MUXER)                 += rsoenc.o rso.o
 OBJS-$(CONFIG_RPL_DEMUXER)               += rpl.o
 OBJS-$(CONFIG_RTP_MUXER)                 += rtp.o         \
                                             rtpenc_aac.o     \
+                                            rtpenc_latm.o    \
                                             rtpenc_amr.o     \
                                             rtpenc_h263.o    \
                                             rtpenc_mpv.o     \
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index ec51a57ca8..f9091f0afd 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -729,6 +729,7 @@ typedef struct AVFormatContext {
 #define AVFMT_FLAG_NOFILLIN     0x0010 ///< Do not infer any values from other values, just return what is stored in the container
 #define AVFMT_FLAG_NOPARSE      0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled
 #define AVFMT_FLAG_RTP_HINT     0x0040 ///< Add RTP hinting to the output file
+#define AVFMT_FLAG_MP4A_LATM    0x0080 ///< Enable RTP MP4A-LATM payload
 #define AVFMT_FLAG_SORT_DTS    0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down)
 #define AVFMT_FLAG_PRIV_OPT    0x20000 ///< Enable use of private options by delaying codec open (this could be made default once all code is converted)
 
diff --git a/libavformat/options.c b/libavformat/options.c
index 40fd49ff8b..82be8487eb 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -51,6 +51,7 @@ static const AVOption options[]={
 {"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"},
 {"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
 {"sortdts", "try to interleave outputted packets by dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_SORT_DTS }, INT_MIN, INT_MAX, D, "fflags"},
+{"latm", "enable RTP MP4A-LATM payload", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"},
 {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D},
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 71ccdabf4a..7b2e78e88e 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -404,7 +404,10 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
         ff_rtp_send_mpegvideo(s1, pkt->data, size);
         break;
     case CODEC_ID_AAC:
-        ff_rtp_send_aac(s1, pkt->data, size);
+        if (s1->flags & AVFMT_FLAG_MP4A_LATM)
+            ff_rtp_send_latm(s1, pkt->data, size);
+        else
+            ff_rtp_send_aac(s1, pkt->data, size);
         break;
     case CODEC_ID_AMR_NB:
     case CODEC_ID_AMR_WB:
diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h
index b9663c55b0..d65214aeb0 100644
--- a/libavformat/rtpenc.h
+++ b/libavformat/rtpenc.h
@@ -65,6 +65,7 @@ void ff_rtp_send_data(AVFormatContext *s1, const uint8_t *buf1, int len, int m);
 void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_h263(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_aac(AVFormatContext *s1, const uint8_t *buff, int size);
+void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size);
 void ff_rtp_send_amr(AVFormatContext *s1, const uint8_t *buff, int size);
 void ff_rtp_send_mpegvideo(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_xiph(AVFormatContext *s1, const uint8_t *buff, int size);
diff --git a/libavformat/rtpenc_latm.c b/libavformat/rtpenc_latm.c
new file mode 100644
index 0000000000..501fa5d5d5
--- /dev/null
+++ b/libavformat/rtpenc_latm.c
@@ -0,0 +1,60 @@
+/*
+ * RTP Packetization of MPEG-4 Audio (RFC 3016)
+ * Copyright (c) 2011 Juan Carlos Rodriguez <ing.juancarlosrodriguez@hotmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rtpenc.h"
+
+void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size) {
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    RTPMuxContext *s = s1->priv_data;
+    int header_size;
+    int offset = 0;
+    int len    = 0;
+
+    /* skip ADTS header, if present */
+    if ((s1->streams[0]->codec->extradata_size) == 0) {
+        size -= 7;
+        buff += 7;
+    }
+
+    /* PayloadLengthInfo() */
+    header_size = size/0xFF + 1;
+    memset(s->buf, 0xFF, header_size - 1);
+    s->buf[header_size - 1] = size % 0xFF;
+
+    s->timestamp = s->cur_timestamp;
+
+    /* PayloadMux() */
+    while (size > 0) {
+        len   = FFMIN(size, s->max_payload_size - (!offset ? header_size : 0));
+        size -= len;
+        if (!offset) {
+            memcpy(s->buf + header_size, buff, len);
+            ff_rtp_send_data(s1, s->buf, header_size + len, !size);
+        } else {
+            ff_rtp_send_data(s1, buff + offset, len, !size);
+        }
+        offset += len;
+    }
+}
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index f7aec1b766..c62e00d775 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -23,6 +23,7 @@
 #include "libavutil/base64.h"
 #include "libavutil/parseutils.h"
 #include "libavcodec/xiph.h"
+#include "libavcodec/mpeg4audio.h"
 #include "avformat.h"
 #include "internal.h"
 #include "avc.h"
@@ -299,6 +300,69 @@ xiph_fail:
     return NULL;
 }
 
+static int latm_context2profilelevel(AVCodecContext *c) {
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    int profile_level = 0x2B;
+
+    /* TODO: AAC Profile only supports AAC LC Object Type.
+     * Different Object Types should implement different Profile Levels */
+
+    if (c->sample_rate <= 24000) {
+        if (c->channels <= 2)
+            profile_level = 0x28; // AAC Profile, Level 1
+    } else if (c->sample_rate <= 48000) {
+        if (c->channels <= 2) {
+            profile_level = 0x29; // AAC Profile, Level 2
+        } else if (c->channels <= 5) {
+            profile_level = 0x2A; // AAC Profile, Level 4
+        }
+    } else if (c->sample_rate <= 96000) {
+        if (c->channels <= 5) {
+            profile_level = 0x2B; // AAC Profile, Level 5
+        }
+    }
+
+    return profile_level;
+}
+
+static char *latm_context2config(AVCodecContext *c) {
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    uint8_t config_byte[6];
+    int rate_index;
+    char *config;
+
+    for (rate_index = 0; rate_index < 16; rate_index++)
+        if (ff_mpeg4audio_sample_rates[rate_index] == c->sample_rate)
+            break;
+    if (rate_index == 16) {
+        av_log(c, AV_LOG_ERROR, "Unsupported sample rate\n");
+        return NULL;
+    }
+
+    config_byte[0] = 0x40;
+    config_byte[1] = 0;
+    config_byte[2] = 0x20 | rate_index;
+    config_byte[3] = c->channels << 4;
+    config_byte[4] = 0x3f;
+    config_byte[5] = 0xc0;
+
+    config = av_malloc(6*2+1);
+    if (!config) {
+        av_log(c, AV_LOG_ERROR, "Cannot allocate memory for the config info.\n");
+        return NULL;
+    }
+    ff_data_to_hex(config, config_byte, 6, 1);
+    config[12] = 0;
+
+    return config;
+}
+
 static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type, int flags)
 {
     char *config = NULL;
@@ -334,6 +398,15 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                      payload_type, config ? config : "");
             break;
         case CODEC_ID_AAC:
+            if (flags & AVFMT_FLAG_MP4A_LATM) {
+                config = latm_context2config(c);
+                if (!config)
+                    return NULL;
+                av_strlcatf(buff, size, "a=rtpmap:%d MP4A-LATM/%d/%d\r\n"
+                                        "a=fmtp:%d profile-level-id=%d;cpresent=0;config=%s\r\n",
+                                         payload_type, c->sample_rate, c->channels,
+                                         payload_type, latm_context2profilelevel(c), config);
+            } else {
             if (c->extradata_size) {
                 config = extradata2config(c);
             } else {
@@ -352,6 +425,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                     "indexdeltalength=3%s\r\n",
                                      payload_type, c->sample_rate, c->channels,
                                      payload_type, config);
+            }
             break;
         case CODEC_ID_PCM_S16BE:
             if (payload_type >= RTP_PT_PRIVATE)

From 65a4d8e5a78800a4f493fa21c4bb329d31df96ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 16:21:49 +0300
Subject: [PATCH 127/830] rtpenc_chain: Pass the MP4A_LATM flag to chained
 muxers

---
 libavformat/rtpenc_chain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c
index bd3d1a875d..faaff097d7 100644
--- a/libavformat/rtpenc_chain.c
+++ b/libavformat/rtpenc_chain.c
@@ -48,6 +48,7 @@ AVFormatContext *ff_rtp_chain_mux_open(AVFormatContext *s, AVStream *st,
     rtpctx->max_delay = s->max_delay;
     /* Copy other stream parameters. */
     rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio;
+    rtpctx->flags |= s->flags & AVFMT_FLAG_MP4A_LATM;
 
     /* Set the synchronized start time. */
     rtpctx->start_time_realtime = s->start_time_realtime;

From 6efb29686fc9a7f76480405df8fe7eaa7a9dd4cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 16:21:50 +0300
Subject: [PATCH 128/830] Reindent

---
 libavformat/sdp.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index c62e00d775..409948ebbb 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -407,24 +407,24 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                          payload_type, c->sample_rate, c->channels,
                                          payload_type, latm_context2profilelevel(c), config);
             } else {
-            if (c->extradata_size) {
-                config = extradata2config(c);
-            } else {
-                /* FIXME: maybe we can forge config information based on the
-                 *        codec parameters...
-                 */
-                av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n");
-                return NULL;
-            }
-            if (config == NULL) {
-                return NULL;
-            }
-            av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n"
-                                    "a=fmtp:%d profile-level-id=1;"
-                                    "mode=AAC-hbr;sizelength=13;indexlength=3;"
-                                    "indexdeltalength=3%s\r\n",
-                                     payload_type, c->sample_rate, c->channels,
-                                     payload_type, config);
+                if (c->extradata_size) {
+                    config = extradata2config(c);
+                } else {
+                    /* FIXME: maybe we can forge config information based on the
+                     *        codec parameters...
+                     */
+                    av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n");
+                    return NULL;
+                }
+                if (config == NULL) {
+                    return NULL;
+                }
+                av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n"
+                                        "a=fmtp:%d profile-level-id=1;"
+                                        "mode=AAC-hbr;sizelength=13;indexlength=3;"
+                                        "indexdeltalength=3%s\r\n",
+                                         payload_type, c->sample_rate, c->channels,
+                                         payload_type, config);
             }
             break;
         case CODEC_ID_PCM_S16BE:

From 89a20987355757be64c49fb714721c38902ac1cc Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 20:46:24 +0100
Subject: [PATCH 129/830] Fix ff_mpa_synth_filter_fixed() prototype

The prototype should use the same typedefs as the definition, or it
will fail where int32_t is not int (DOS apparently).

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index 7b05b68eee..597e2533f5 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -39,7 +39,7 @@ void ff_mpa_synth_filter_fixed(MPADSPContext *s,
                                int32_t *synth_buf_ptr, int *synth_buf_offset,
                                int32_t *window, int *dither_state,
                                int16_t *samples, int incr,
-                               int *sb_samples);
+                               int32_t *sb_samples);
 
 void ff_mpa_synth_filter_float(MPADSPContext *s,
                                float *synth_buf_ptr, int *synth_buf_offset,

From 0ffc84150599d15b66a3960202e07755f99fe0d0 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 18 May 2011 08:10:49 -0400
Subject: [PATCH 130/830] h264: fix loopfilter with threading at slice
 boundaries.

---
 libavcodec/h264.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 6b262bc992..5338146499 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2557,7 +2557,7 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     return 0;
 }
 
-static void loop_filter(H264Context *h){
+static void loop_filter(H264Context *h, int start_x, int end_x){
     MpegEncContext * const s = &h->s;
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize, mb_x, mb_y;
@@ -2566,7 +2566,7 @@ static void loop_filter(H264Context *h){
     const int pixel_shift = h->pixel_shift;
 
     if(h->deblocking_filter) {
-        for(mb_x= 0; mb_x<s->mb_width; mb_x++){
+        for(mb_x= start_x; mb_x<end_x; mb_x++){
             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
                 int mb_xy, mb_type;
                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
@@ -2632,6 +2632,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
     H264Context *h = *(void**)arg;
     MpegEncContext * const s = &h->s;
     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
+    int lf_x_start = s->mb_x;
 
     s->mb_skip_run= -1;
 
@@ -2670,6 +2671,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
 
             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
                 return 0;
             }
             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
@@ -2679,8 +2681,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             }
 
             if( ++s->mb_x >= s->mb_width ) {
-                s->mb_x = 0;
-                loop_filter(h);
+                loop_filter(h, lf_x_start, s->mb_x);
+                s->mb_x = lf_x_start = 0;
                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
@@ -2693,6 +2695,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if( eos || s->mb_y >= s->mb_height ) {
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
                 return 0;
             }
         }
@@ -2714,13 +2717,12 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if(ret<0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-
                 return -1;
             }
 
             if(++s->mb_x >= s->mb_width){
-                s->mb_x=0;
-                loop_filter(h);
+                loop_filter(h, lf_x_start, s->mb_x);
+                s->mb_x = lf_x_start = 0;
                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
@@ -2747,6 +2749,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                    if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
 
                     return 0;
                 }else{

From 4e987f8282ff7658a6f804b9db39954bb59fa72e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 19 May 2011 16:47:59 -0400
Subject: [PATCH 131/830] h264: correct border check.

When backing up the top-left border, check that the top-left
(rather than left) MB indeed does belong to our slice. If it
doesn't, backing up has no positive effect but may accidentally
interfere with other threads writing in the same space.

Fixes occasional one-off effects when enabling slice-MT.
---
 libavcodec/h264.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 5338146499..5fb303c82f 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1034,7 +1034,7 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   int linesize, int uvlinesize,
                                   int xchg, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
-    int deblock_left;
+    int deblock_topleft;
     int deblock_top;
     int top_idx = 1;
     uint8_t *top_border_m1;
@@ -1050,11 +1050,11 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
     }
 
     if(h->deblocking_filter == 2) {
-        deblock_left = h->left_type[0];
-        deblock_top  = h->top_type;
+        deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
+        deblock_top     = h->top_type;
     } else {
-        deblock_left = (s->mb_x > 0);
-        deblock_top =  (s->mb_y > !!MB_FIELD);
+        deblock_topleft = (s->mb_x > 0);
+        deblock_top     = (s->mb_y > !!MB_FIELD);
     }
 
     src_y  -=   linesize + 1 + pixel_shift;
@@ -1077,7 +1077,7 @@ if (xchg) AV_SWAP64(b,a);\
 else      AV_COPY64(b,a);
 
     if(deblock_top){
-        if(deblock_left){
+        if(deblock_topleft){
             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
         }
         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
@@ -1088,7 +1088,7 @@ else      AV_COPY64(b,a);
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
         if(deblock_top){
-            if(deblock_left){
+            if(deblock_topleft){
                 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
             }
@@ -2611,7 +2611,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
         }
     }
     h->slice_type= old_slice_type;
-    s->mb_x= 0;
+    s->mb_x= end_x;
     s->mb_y= end_mb_y - FRAME_MBAFF;
     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);

From 984ece7503597d30e6f3bdeb67e337ea1616f880 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Thu, 19 May 2011 21:33:27 +0200
Subject: [PATCH 132/830] qdm2: Use floating point synthesis filter.

This avoid needlessly convertion from floating point to fixed point and back.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/qdm2.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index f74cfd9258..53ee304a28 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -172,9 +172,9 @@ typedef struct {
 
     /// Synthesis filter
     MPADSPContext mpadsp;
-    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
+    DECLARE_ALIGNED(32, float, synth_buf)[MPA_MAX_CHANNELS][512*2];
     int synth_buf_offset[MPA_MAX_CHANNELS];
-    DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
+    DECLARE_ALIGNED(32, float, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
 
     /// Mixed temporary data used in decoding
     float tone_level[MPA_MAX_CHANNELS][30][64];
@@ -331,11 +331,6 @@ static av_cold void qdm2_init_vlc(void)
     }
 }
 
-
-/* for floating point to fixed point conversion */
-static const float f2i_scale = (float) (1 << (FRAC_BITS - 15));
-
-
 static int qdm2_get_vlc (GetBitContext *gb, VLC *vlc, int flag, int depth)
 {
     int value;
@@ -484,8 +479,8 @@ static void build_sb_samples_from_noise (QDM2Context *q, int sb)
 
     for (ch = 0; ch < q->nb_channels; ch++)
         for (j = 0; j < 64; j++) {
-            q->sb_samples[ch][j * 2][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
-            q->sb_samples[ch][j * 2 + 1][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
+            q->sb_samples[ch][j * 2][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j];
+            q->sb_samples[ch][j * 2 + 1][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j];
         }
 }
 
@@ -925,11 +920,11 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
                     for (chs = 0; chs < q->nb_channels; chs++)
                         for (k = 0; k < run; k++)
                             if ((j + k) < 128)
-                                q->sb_samples[chs][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs] + .5);
+                                q->sb_samples[chs][j + k][sb] = q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs];
                 } else {
                     for (k = 0; k < run; k++)
                         if ((j + k) < 128)
-                            q->sb_samples[ch][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[ch][sb][(j + k)/2] * samples[k] + .5);
+                            q->sb_samples[ch][j + k][sb] = q->tone_level[ch][sb][(j + k)/2] * samples[k];
                 }
 
                 j += run;
@@ -1603,7 +1598,7 @@ static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet)
  */
 static void qdm2_synthesis_filter (QDM2Context *q, int index)
 {
-    OUT_INT samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE];
+    float samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE];
     int i, k, ch, sb_used, sub_sampling, dither_state = 0;
 
     /* copy sb_samples */
@@ -1615,12 +1610,12 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
                 q->sb_samples[ch][(8 * index) + i][k] = 0;
 
     for (ch = 0; ch < q->nb_channels; ch++) {
-        OUT_INT *samples_ptr = samples + ch;
+        float *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter_fixed(&q->mpadsp,
+            ff_mpa_synth_filter_float(&q->mpadsp,
                 q->synth_buf[ch], &(q->synth_buf_offset[ch]),
-                ff_mpa_synth_window_fixed, &dither_state,
+                ff_mpa_synth_window_float, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
             samples_ptr += 32 * q->nb_channels;
@@ -1632,7 +1627,7 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
 
     for (ch = 0; ch < q->channels; ch++)
         for (i = 0; i < q->frame_size; i++)
-            q->output_buffer[q->channels * i + ch] += (float)(samples[q->nb_channels * sub_sampling * i + ch] >> (sizeof(OUT_INT)*8-16));
+            q->output_buffer[q->channels * i + ch] += (1 << 23) * samples[q->nb_channels * sub_sampling * i + ch];
 }
 
 
@@ -1649,7 +1644,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
     initialized = 1;
 
     qdm2_init_vlc();
-    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
+    ff_mpa_synth_init_float(ff_mpa_synth_window_float);
     softclip_table_init();
     rnd_table_init();
     init_noise_samples();

From ed96fffb6e0c744a405f379ba4ff4faf9e79173d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 09:33:57 +0200
Subject: [PATCH 133/830] libmp3lame: include "libavutil/intreadwrite.h" header

Fix compilation.
---
 libavcodec/libmp3lame.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index 5cfb122903..05893830c1 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -24,6 +24,7 @@
  * Interface to libmp3lame for mp3 encoding.
  */
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "mpegaudio.h"
 #include <lame/lame.h>

From 328810390d0458b7c8200342a87f238f7610b776 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 19:01:38 +0200
Subject: [PATCH 134/830] ffmpeg: remove useless NULL-check on
 avfilter_unref_buffer

The check is no more required since recent changes in the
avfilter_unref_buffer(), the check is done in the function.
Simplify.
---
 ffmpeg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 2b19c7f21b..fb644adc18 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1785,8 +1785,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
                     cont:
                     frame_available = (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) &&
                                        ost->output_video_filter && avfilter_poll_frame(ost->output_video_filter->inputs[0]);
-                    if (ost->picref)
-                        avfilter_unref_buffer(ost->picref);
+                    avfilter_unref_buffer(ost->picref);
                 }
 #endif
                 }

From 0d65e0f8cb0f924be95650f50f3d05d0b223aceb Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 00:58:00 +0200
Subject: [PATCH 135/830] cmdutils: reset *picref_ptr to NULL in
 get_filtered_frame()

Avoid the presence of an invalid pointer, fix a crash in case of
get_filtered_frame() failure.
---
 cmdutils.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmdutils.c b/cmdutils.c
index a5363b8176..71d6d33454 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -908,6 +908,7 @@ int get_filtered_video_frame(AVFilterContext *ctx, AVFrame *frame,
 {
     int ret;
     AVFilterBufferRef *picref;
+    *picref_ptr = NULL;
 
     if ((ret = avfilter_request_frame(ctx->inputs[0])) < 0)
         return ret;

From a469c32478cd2bee1fabc37b388d0fcda28ecc07 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 17 May 2011 20:22:48 +0200
Subject: [PATCH 136/830] indeo3: add @file doxy and a link to multimedia wiki
 documentation

---
 libavcodec/indeo3.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index 588a5b4cac..2be17ff517 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -1,7 +1,4 @@
 /*
- * Intel Indeo 3 (IV31, IV32, etc.) video decoder for ffmpeg
- * written, produced, and directed by Alan Smithee
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,6 +16,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Intel Indeo 3 (IV31, IV32, etc.) video decoder for FFmpeg
+ * written, produced, and directed by Alan Smithee
+ *
+ * For some documentation see:
+ * http://wiki.multimedia.cx/index.php?title=Indeo_3
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

From 5443812fbcc7c12f783c1328539232cb3bcde9e1 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 17 May 2011 20:25:06 +0200
Subject: [PATCH 137/830] indeo3: remove unnecessary includes

---
 libavcodec/indeo3.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index 2be17ff517..7a648cd3f3 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -25,10 +25,6 @@
  * http://wiki.multimedia.cx/index.php?title=Indeo_3
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "dsputil.h"

From 7f58eb3c2b552f232905731b5944307e72c590a0 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 17 May 2011 13:01:21 +0200
Subject: [PATCH 138/830] indeo3: release buffer in indeo3_decode_end()

Fix leak.
---
 libavcodec/indeo3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index 7a648cd3f3..b74fcf7c22 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -1137,6 +1137,9 @@ static av_cold int indeo3_decode_end(AVCodecContext *avctx)
 
     iv_free_func(s);
 
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
     return 0;
 }
 

From 1e31ea71ffa27928d27b934945cc97d9e92ac471 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 21:14:09 +0200
Subject: [PATCH 139/830] ffprobe.texi: remove inclusion of muxers section

Muxers are not used by ffprobe.
---
 doc/ffprobe.texi | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index 15e7764a1e..514d19e20f 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -112,7 +112,6 @@ with name "STREAM".
 @c man end
 
 @include demuxers.texi
-@include muxers.texi
 @include protocols.texi
 @include indevs.texi
 

From d4111e525dbb0efe72c03d715134d654f3d66392 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 19:41:19 +0200
Subject: [PATCH 140/830] encoders.texi: decrease level for audio encoders
 section

Make Audio Encoders a separate chapter, and decrese the contained
sections level by one.

Improve rendering, and improve consistency with the other parts of the
manual.
---
 doc/encoders.texi | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 2855d89f7a..a7193a6258 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -17,11 +17,15 @@ with the options @code{--enable-encoder=@var{ENCODER}} /
 The option @code{-codecs} of the ff* tools will display the list of
 enabled encoders.
 
-A description of some of the currently available encoders follows.
+@c man end ENCODERS
 
-@section Audio Encoders
+@chapter Audio Encoders
+@c man begin AUDIO ENCODERS
 
-@subsection ac3 and ac3_fixed
+A description of some of the currently available audio encoders
+follows.
+
+@section ac3 and ac3_fixed
 
 AC-3 audio encoders.
 
@@ -36,7 +40,7 @@ quality audio for a given bitrate. The @var{ac3_fixed} encoder is not the
 default codec for any of the output formats, so it must be specified explicitly
 using the option @code{-acodec ac3_fixed} in order to use it.
 
-@subheading AC-3 Metadata
+@subsection AC-3 Metadata
 
 The AC-3 metadata options are used to set parameters that describe the audio,
 but in most cases do not affect the audio encoding itself. Some of the options
@@ -55,7 +59,7 @@ documents.
 @item @uref{http://www.dolby.com/uploadedFiles/zz-_Shared_Assets/English_PDFs/Professional/46_DDEncodingGuidelines.pdf,Dolby Digital Professional Encoding Guidelines}
 @end itemize
 
-@subsubheading Metadata Control Options
+@subsubsection Metadata Control Options
 
 @table @option
 
@@ -72,7 +76,7 @@ Metadata values can be changed before encoding each frame.
 
 @end table
 
-@subsubheading Downmix Levels
+@subsubsection Downmix Levels
 
 @table @option
 
@@ -106,7 +110,7 @@ Silence Surround Channel(s)
 
 @end table
 
-@subsubheading Audio Production Information
+@subsubsection Audio Production Information
 Audio Production Information is optional information describing the mixing
 environment.  Either none or both of the fields are written to the bitstream.
 
@@ -140,7 +144,7 @@ Small Room
 
 @end table
 
-@subsubheading Other Metadata Options
+@subsubsection Other Metadata Options
 
 @table @option
 
@@ -195,7 +199,7 @@ Original Source (default)
 
 @end table
 
-@subsubheading Extended Bitstream Information
+@subsection Extended Bitstream Information
 The extended bitstream options are part of the Alternate Bit Stream Syntax as
 specified in Annex D of the A/52:2010 standard. It is grouped into 2 parts.
 If any one parameter in a group is specified, all values in that group will be
@@ -205,7 +209,7 @@ will use these values instead of the ones specified in the @code{center_mixlev}
 and @code{surround_mixlev} options if it supports the Alternate Bit Stream
 Syntax.
 
-@subsubheading Extended Bitstream Information - Part 1
+@subsubsection Extended Bitstream Information - Part 1
 
 @table @option
 
@@ -302,7 +306,7 @@ Silence Surround Channel(s)
 
 @end table
 
-@subsubheading Extended Bitstream Information - Part 2
+@subsubsection Extended Bitstream Information - Part 2
 
 @table @option
 
@@ -353,7 +357,7 @@ HDCD A/D Converter
 
 @end table
 
-@subheading Other AC-3 Encoding Options
+@subsection Other AC-3 Encoding Options
 
 @table @option
 
@@ -365,4 +369,4 @@ is highly recommended that it be left as enabled except for testing purposes.
 
 @end table
 
-@c man end ENCODERS
+@c man end AUDIO ENCODERS

From 5e45f03420945dfeaf66729f5d022e5c15ff5723 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 20:34:56 +0200
Subject: [PATCH 141/830] doc: add decoders.texi file

---
 doc/decoders.texi | 22 ++++++++++++++++++++++
 doc/ffmpeg.texi   |  1 +
 doc/ffplay.texi   |  1 +
 doc/ffprobe.texi  |  1 +
 4 files changed, 25 insertions(+)
 create mode 100644 doc/decoders.texi

diff --git a/doc/decoders.texi b/doc/decoders.texi
new file mode 100644
index 0000000000..359ab39bd9
--- /dev/null
+++ b/doc/decoders.texi
@@ -0,0 +1,22 @@
+@chapter Decoders
+@c man begin DECODERS
+
+Decoders are configured elements in FFmpeg which allow the decoding of
+multimedia streams.
+
+When you configure your FFmpeg build, all the supported native decoders
+are enabled by default. Decoders requiring an external library must be enabled
+manually via the corresponding @code{--enable-lib} option. You can list all
+available decoders using the configure option @code{--list-decoders}.
+
+You can disable all the decoders with the configure option
+@code{--disable-decoders} and selectively enable / disable single decoders
+with the options @code{--enable-decoder=@var{DECODER}} /
+@code{--disable-decoder=@var{DECODER}}.
+
+The option @code{-codecs} of the ff* tools will display the list of
+enabled decoders.
+
+A description of some of the currently available decoders follows.
+
+@c man end DECODERS
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 7f2230d46f..0cf1a829b2 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -1052,6 +1052,7 @@ file to which you want to add them.
 @c man end EXAMPLES
 
 @include eval.texi
+@include decoders.texi
 @include encoders.texi
 @include demuxers.texi
 @include muxers.texi
diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index 9e842911e0..7b99666f1f 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -174,6 +174,7 @@ Seek to percentage in file corresponding to fraction of width.
 @c man end
 
 @include eval.texi
+@include decoders.texi
 @include demuxers.texi
 @include muxers.texi
 @include indevs.texi
diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index 514d19e20f..ee4ffb9671 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -111,6 +111,7 @@ with name "STREAM".
 @end table
 @c man end
 
+@include decoders.texi
 @include demuxers.texi
 @include protocols.texi
 @include indevs.texi

From c5385147cd74bc1892650c47531c7797f22b0b1d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 20:51:28 +0200
Subject: [PATCH 142/830] decoders.texi: add documentation for rawvideo decoder
 and options

---
 doc/decoders.texi | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/doc/decoders.texi b/doc/decoders.texi
index 359ab39bd9..597b9e6d41 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -17,6 +17,34 @@ with the options @code{--enable-decoder=@var{DECODER}} /
 The option @code{-codecs} of the ff* tools will display the list of
 enabled decoders.
 
-A description of some of the currently available decoders follows.
-
 @c man end DECODERS
+
+@chapter Video Decoders
+@c man begin VIDEO DECODERS
+
+A description of some of the currently available video decoders
+follows.
+
+@section rawvideo
+
+Rawvideo decoder.
+
+This decoder allow to decoder rawvideo stream.
+
+@subsection Options
+
+@table @option
+@item top @var{top_field_first}
+Specify the assumed field type of the input video.
+@table @option
+@item -1
+the video is assumed to be progressive (default)
+@item 0
+bottom-field-first is assumed
+@item 1
+top-field-first is assumed
+@end table
+
+@end table
+
+@c man end VIDEO DECODERS

From 52da548c7dabac364cc38e4239edc56a28d0d63c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 21:14:37 +0200
Subject: [PATCH 143/830] encoders.texi: add documentation for the libx264
 encoder

Also remove -x264opts item from the ffmpeg manual, since it belongs to
the encoders section.
---
 doc/encoders.texi | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 doc/ffmpeg.texi   |  7 ------
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index a7193a6258..9f9e9a9284 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -370,3 +370,66 @@ is highly recommended that it be left as enabled except for testing purposes.
 @end table
 
 @c man end AUDIO ENCODERS
+
+@chapter Video Encoders
+@c man begin VIDEO ENCODERS
+
+A description of some of the currently available video encoders
+follows.
+
+@section libx264
+
+H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 format supported through
+libx264.
+
+Requires the presence of the libx64 headers and library during
+configuration. You need to explicitely configure the build with
+@code{--enable-libx264}.
+
+@subsection Options
+
+@table @option
+
+@item preset @var{preset_name}
+Set the encoding preset.
+
+@item tune @var{tune_name}
+Tune the encoding params.
+Deprecated in favor of @var{x264_opts}
+
+@item fastfirstpass @var{bool}
+Use fast settings when encoding first pass, default value is 1.
+Deprecated in favor of @var{x264_opts}.
+
+@item profile @var{profile_name}
+Set profile restrictions.
+Deprecated in favor of @var{x264_opts}.
+
+@item level @var{level}
+Specify level (as defined by Annex A).
+Deprecated in favor of @var{x264_opts}.
+
+@item passlogfile @var{filename}
+Specify filename for 2 pass stats.
+Deprecated in favor of @var{x264_opts}.
+
+@item wpredp @var{wpred_type}
+Specify Weighted prediction for P-frames.
+Deprecated in favor of @var{x264_opts}.
+
+@item x264opts @var{options}
+Allow to set any x264 option, see x264 manual for a list.
+
+@var{options} is a list of @var{key}=@var{value} couples separated by
+":".
+@end table
+
+For example to specify libx264 encoding options with @file{ffmpeg}:
+@example
+ffmpeg -i foo.mpg -vcodec libx264 -x264opts keyint=123:min-keyint=20 -an out.mkv
+@end example
+
+For more information about libx264 and the supported options see:
+@url{http://www.videolan.org/developers/x264.html}
+
+@c man end VIDEO ENCODERS
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 0cf1a829b2..77fde18564 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -312,13 +312,6 @@ the input video.
 Use the option "-filters" to show all the available filters (including
 also sources and sinks).
 
-@item -x264opts @var{option}
-Allows you to set any x264 option, see x264 manual for a list.
-@example
-ffmpeg -i foo.mpg -vcodec libx264 -x264opts keyint=123:min-keyint=20 -an out.mkv
-@end example
-
-
 @end table
 
 @section Advanced Video Options

From 950b3589974e55888fc37369f2d9216a18c1ca84 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 21:41:50 +0200
Subject: [PATCH 144/830] cmdutils: use const AVClass * when senseful
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warnings:
cmdutils.c: In function ‘opt_default’:
cmdutils.c:304: warning: initialization discards qualifiers from pointer target type
cmdutils.c: In function ‘set_context_opts’:
cmdutils.c:431: warning: passing argument 2 of ‘alloc_priv_context’ discards qualifiers from pointer target type
cmdutils.c:414: note: expected ‘struct AVClass *’ but argument is of type ‘const struct AVClass *’
---
 cmdutils.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cmdutils.c b/cmdutils.c
index 71d6d33454..5a0c8f378f 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -301,7 +301,7 @@ int opt_default(const char *opt, const char *arg){
     AVInputFormat *iformat = NULL;
 
     while ((p = av_codec_next(p))) {
-        AVClass *c = p->priv_class;
+        const AVClass *c = p->priv_class;
         if (c && av_find_opt(&c, opt, NULL, 0, 0))
             break;
     }
@@ -411,10 +411,11 @@ int opt_timelimit(const char *opt, const char *arg)
     return 0;
 }
 
-static void *alloc_priv_context(int size, AVClass *class){
+static void *alloc_priv_context(int size, const AVClass *class)
+{
     void *p = av_mallocz(size);
     if (p) {
-        *(AVClass**)p = class;
+        *(const AVClass **)p = class;
         av_opt_set_defaults(p);
     }
     return p;

From 125d807660996b879c4762c3a15c4bb485e13f4c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 10:12:42 +0200
Subject: [PATCH 145/830] decoders.texi: fix typos in rawvideo section

---
 doc/decoders.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/decoders.texi b/doc/decoders.texi
index 597b9e6d41..18ac2fa8db 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -29,7 +29,7 @@ follows.
 
 Rawvideo decoder.
 
-This decoder allow to decoder rawvideo stream.
+This decoder decodes rawvideo streams.
 
 @subsection Options
 

From f1b3f33d48b366dd7b3b915b4de90d9ad4c4ae39 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 11:36:16 +0200
Subject: [PATCH 146/830] vf_libopencv: prefer opencv/cxcore.h over cxtypes.h

Require the presence of opencv/cxcore.h in place of opencv/cxtypes.h,
which has been removed. Fix compilation with libopencv > 2.1.0.

Fix trac issue #221.
---
 configure                  | 2 +-
 libavfilter/vf_libopencv.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 6445636c6d..de321ba018 100755
--- a/configure
+++ b/configure
@@ -2919,7 +2919,7 @@ enabled libmp3lame && require  "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_q
 enabled libnut     && require  libnut libnut.h nut_demuxer_init -lnut
 enabled libopencore_amrnb  && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
 enabled libopencore_amrwb  && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
-enabled libopencv  && require_pkg_config opencv opencv/cv.h cvCreateImageHeader
+enabled libopencv  && require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader
 enabled libopenjpeg && require libopenjpeg openjpeg.h opj_version -lopenjpeg
 enabled librtmp    && require_pkg_config librtmp librtmp/rtmp.h RTMP_Socket
 enabled libschroedinger && require_pkg_config schroedinger-1.0 schroedinger/schro.h schro_init
diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
index 55e0a7f757..b789c8e19c 100644
--- a/libavfilter/vf_libopencv.c
+++ b/libavfilter/vf_libopencv.c
@@ -26,7 +26,7 @@
 /* #define DEBUG */
 
 #include <opencv/cv.h>
-#include <opencv/cxtypes.h>
+#include <opencv/cxcore.h>
 #include "libavutil/avstring.h"
 #include "libavutil/file.h"
 #include "avfilter.h"

From 0199e00bc80a55aacf7ecd393bf32dcd64e06739 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 13:44:11 +0100
Subject: [PATCH 147/830] mpegaudio: move all header parsing to
 mpegaudiodecheader.[ch]

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mp3_header_compress_bsf.c   |  2 +-
 libavcodec/mp3_header_decompress_bsf.c |  2 +-
 libavcodec/mpegaudio.h                 | 38 +-----------------------
 libavcodec/mpegaudio_parser.c          | 39 ------------------------
 libavcodec/mpegaudiodecheader.c        | 37 +++++++++++++++++++++++
 libavcodec/mpegaudiodecheader.h        | 41 ++++++++++++++++++++++++--
 6 files changed, 79 insertions(+), 80 deletions(-)

diff --git a/libavcodec/mp3_header_compress_bsf.c b/libavcodec/mp3_header_compress_bsf.c
index c880e5e53d..bc3659ef3e 100644
--- a/libavcodec/mp3_header_compress_bsf.c
+++ b/libavcodec/mp3_header_compress_bsf.c
@@ -20,7 +20,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
-#include "mpegaudio.h"
+#include "mpegaudiodecheader.h"
 
 
 static int mp3_header_compress(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c
index b4b4167620..3f3074286a 100644
--- a/libavcodec/mp3_header_decompress_bsf.c
+++ b/libavcodec/mp3_header_decompress_bsf.c
@@ -20,7 +20,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
-#include "mpegaudio.h"
+#include "mpegaudiodecheader.h"
 #include "mpegaudiodata.h"
 
 
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index a46ecc5a9f..b55680100b 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -30,7 +30,7 @@
 #   define CONFIG_FLOAT 0
 #endif
 
-#include "avcodec.h"
+#include <stdint.h>
 
 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@@ -47,8 +47,6 @@
 #define MPA_DUAL    2
 #define MPA_MONO    3
 
-#define MP3_MASK 0xFFFE0CCF
-
 #ifndef FRAC_BITS
 #define FRAC_BITS   23   /* fractional bits for sb_samples and dct */
 #define WFRAC_BITS  16   /* fractional bits for window */
@@ -72,40 +70,6 @@ typedef int32_t MPA_INT;
 typedef int16_t OUT_INT;
 #endif
 
-#define MPA_DECODE_HEADER \
-    int frame_size; \
-    int error_protection; \
-    int layer; \
-    int sample_rate; \
-    int sample_rate_index; /* between 0 and 8 */ \
-    int bit_rate; \
-    int nb_channels; \
-    int mode; \
-    int mode_ext; \
-    int lsf;
-
-typedef struct MPADecodeHeader {
-  MPA_DECODE_HEADER
-} MPADecodeHeader;
-
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
-int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-
-/* fast header check for resync */
-static inline int ff_mpa_check_header(uint32_t header){
-    /* header */
-    if ((header & 0xffe00000) != 0xffe00000)
-        return -1;
-    /* layer check */
-    if ((header & (3<<17)) == 0)
-        return -1;
-    /* bit rate */
-    if ((header & (0xf<<12)) == 0xf<<12)
-        return -1;
-    /* frequency */
-    if ((header & (3<<10)) == 3<<10)
-        return -1;
-    return 0;
-}
 
 #endif /* AVCODEC_MPEGAUDIO_H */
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 7cfd107d53..06d46f2152 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -38,45 +38,6 @@ typedef struct MpegAudioParseContext {
 #define SAME_HEADER_MASK \
    (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
 
-/* useful helper to get mpeg audio stream infos. Return -1 if error in
-   header, otherwise the coded frame size in bytes */
-int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
-{
-    MPADecodeHeader s1, *s = &s1;
-
-    if (ff_mpa_check_header(head) != 0)
-        return -1;
-
-    if (ff_mpegaudio_decode_header(s, head) != 0) {
-        return -1;
-    }
-
-    switch(s->layer) {
-    case 1:
-        avctx->codec_id = CODEC_ID_MP1;
-        *frame_size = 384;
-        break;
-    case 2:
-        avctx->codec_id = CODEC_ID_MP2;
-        *frame_size = 1152;
-        break;
-    default:
-    case 3:
-        avctx->codec_id = CODEC_ID_MP3;
-        if (s->lsf)
-            *frame_size = 576;
-        else
-            *frame_size = 1152;
-        break;
-    }
-
-    *sample_rate = s->sample_rate;
-    *channels = s->nb_channels;
-    *bit_rate = s->bit_rate;
-    avctx->sub_id = s->layer;
-    return s->frame_size;
-}
-
 static int mpegaudio_parse(AVCodecParserContext *s1,
                            AVCodecContext *avctx,
                            const uint8_t **poutbuf, int *poutbuf_size,
diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c
index a0bd4fcee6..be7abc619d 100644
--- a/libavcodec/mpegaudiodecheader.c
+++ b/libavcodec/mpegaudiodecheader.c
@@ -108,3 +108,40 @@ int ff_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
 #endif
     return 0;
 }
+
+int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
+{
+    MPADecodeHeader s1, *s = &s1;
+
+    if (ff_mpa_check_header(head) != 0)
+        return -1;
+
+    if (ff_mpegaudio_decode_header(s, head) != 0) {
+        return -1;
+    }
+
+    switch(s->layer) {
+    case 1:
+        avctx->codec_id = CODEC_ID_MP1;
+        *frame_size = 384;
+        break;
+    case 2:
+        avctx->codec_id = CODEC_ID_MP2;
+        *frame_size = 1152;
+        break;
+    default:
+    case 3:
+        avctx->codec_id = CODEC_ID_MP3;
+        if (s->lsf)
+            *frame_size = 576;
+        else
+            *frame_size = 1152;
+        break;
+    }
+
+    *sample_rate = s->sample_rate;
+    *channels = s->nb_channels;
+    *bit_rate = s->bit_rate;
+    avctx->sub_id = s->layer;
+    return s->frame_size;
+}
diff --git a/libavcodec/mpegaudiodecheader.h b/libavcodec/mpegaudiodecheader.h
index 41a491b986..2991595b02 100644
--- a/libavcodec/mpegaudiodecheader.h
+++ b/libavcodec/mpegaudiodecheader.h
@@ -27,13 +27,50 @@
 #ifndef AVCODEC_MPEGAUDIODECHEADER_H
 #define AVCODEC_MPEGAUDIODECHEADER_H
 
-#include "libavutil/common.h"
-#include "mpegaudio.h"
+#include "avcodec.h"
 
+#define MP3_MASK 0xFFFE0CCF
+
+#define MPA_DECODE_HEADER \
+    int frame_size; \
+    int error_protection; \
+    int layer; \
+    int sample_rate; \
+    int sample_rate_index; /* between 0 and 8 */ \
+    int bit_rate; \
+    int nb_channels; \
+    int mode; \
+    int mode_ext; \
+    int lsf;
+
+typedef struct MPADecodeHeader {
+  MPA_DECODE_HEADER
+} MPADecodeHeader;
 
 /* header decoding. MUST check the header before because no
    consistency check is done there. Return 1 if free format found and
    that the frame size must be computed externally */
 int ff_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header);
 
+/* useful helper to get mpeg audio stream infos. Return -1 if error in
+   header, otherwise the coded frame size in bytes */
+int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
+
+/* fast header check for resync */
+static inline int ff_mpa_check_header(uint32_t header){
+    /* header */
+    if ((header & 0xffe00000) != 0xffe00000)
+        return -1;
+    /* layer check */
+    if ((header & (3<<17)) == 0)
+        return -1;
+    /* bit rate */
+    if ((header & (0xf<<12)) == 0xf<<12)
+        return -1;
+    /* frequency */
+    if ((header & (3<<10)) == 3<<10)
+        return -1;
+    return 0;
+}
+
 #endif /* AVCODEC_MPEGAUDIODECHEADER_H */

From f255a28d140a64ea4c1a5060061863aec993b5ea Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 13:27:24 +0100
Subject: [PATCH 148/830] mpegaudio: clean up #includes

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio_parser.c | 1 -
 libavcodec/mpegaudiodata.h    | 2 +-
 libavcodec/mpegaudiodec.c     | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 06d46f2152..f07d34bd29 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -21,7 +21,6 @@
  */
 
 #include "parser.h"
-#include "mpegaudio.h"
 #include "mpegaudiodecheader.h"
 
 
diff --git a/libavcodec/mpegaudiodata.h b/libavcodec/mpegaudiodata.h
index 841ec516ba..84458836fa 100644
--- a/libavcodec/mpegaudiodata.h
+++ b/libavcodec/mpegaudiodata.h
@@ -27,7 +27,7 @@
 #ifndef AVCODEC_MPEGAUDIODATA_H
 #define AVCODEC_MPEGAUDIODATA_H
 
-#include "libavutil/common.h"
+#include <stdint.h>
 
 #define MODE_EXT_MS_STEREO 2
 #define MODE_EXT_I_STEREO  1
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index f0d9958d2b..960d13d1e8 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -27,7 +27,6 @@
 #include "libavutil/audioconvert.h"
 #include "avcodec.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "mathops.h"
 #include "mpegaudiodsp.h"
 

From 50fefa10de920e16036f2be977c39fc0c286d024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 20 May 2011 11:33:57 +0300
Subject: [PATCH 149/830] mpegtsenc: Add an AVClass pointer to the private data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since a private class is set for this muxer, the callers will
assume that the private data starts with an AVClass pointer.
If no such member exists, the first few bytes of the struct
will be overwritten, and the class pointer may be broken at
any later time.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/mpegtsenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 2aa9698651..393b779168 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -53,6 +53,7 @@ typedef struct MpegTSService {
 } MpegTSService;
 
 typedef struct MpegTSWrite {
+    const AVClass *av_class;
     MpegTSSection pat; /* MPEG2 pat table */
     MpegTSSection sdt; /* MPEG2 sdt table context */
     MpegTSService **services;

From eb8da636af0652a4865055c237a76caff02b98bf Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 20 May 2011 12:06:51 +0100
Subject: [PATCH 150/830] fate: allow overriding default build and install dirs

This is useful e.g. for building in a different filesystem than
where the source is kept.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fate.sh b/tests/fate.sh
index 6f0e0fffcb..c522269b54 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -91,8 +91,8 @@ lock ${workdir}     || die "${workdir} locked"
 cd ${workdir}       || die "cd ${workdir} failed"
 
 src=${workdir}/src
-build=${workdir}/build
-inst=${workdir}/install
+: ${build:=${workdir}/build}
+: ${inst:=${workdir}/install}
 
 test -d "$src" && update || checkout || die "Error fetching source"
 

From 5ffccc00567363c8e09d4c47dd03dee99c312b7d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 20 May 2011 12:47:42 +0100
Subject: [PATCH 151/830] fate: add comment field

This adds a comment field to the report header, suitable for
extra information not covered by the automatic fields.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fate.sh b/tests/fate.sh
index c522269b54..9fd117c3bc 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -75,7 +75,7 @@ clean(){
 
 report(){
     date=$(date -u +%Y%m%d%H%M%S)
-    echo "fate:0:${date}:${slot}:${version}:$1:$2" >report
+    echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report
     cat ${build}/config.fate ${build}/tests/data/fate/*.rep >>report
     test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv
 }

From 6da57043eabdcde99e8411da3628f2249e951e66 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 20 May 2011 12:57:12 +0100
Subject: [PATCH 152/830] fate: disable threading for encoding

This explicitly disables threading for encoding as slices are otherwise
automatically activated.  This should be dropped once option resetting
between files is fully implemented.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/regression-funcs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index b79c258e77..933aa648d6 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -53,7 +53,7 @@ echov(){
 FFMPEG_OPTS="-v 0 -y"
 COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact"
 DEC_OPTS="$COMMON_OPTS -threads $threads"
-ENC_OPTS="$COMMON_OPTS -dct fastint"
+ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint"
 
 run_ffmpeg()
 {

From d69f9a4234fefcbf038e6a19203df6865f38ffb8 Mon Sep 17 00:00:00 2001
From: Dave Yeo <dave.r.yeo@gmail.com>
Date: Fri, 20 May 2011 17:33:38 +0200
Subject: [PATCH 153/830] Add support for a.out object format to assembler
 macros.

This format is still used by e.g. OS/2.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/dsputil_yasm.asm |  2 +-
 libavcodec/x86/fft_mmx.asm      |  2 +-
 libavcodec/x86/fmtconvert.asm   |  2 +-
 libavcodec/x86/x86inc.asm       | 11 +++++++++++
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 15f626e8cf..8b19cc1441 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -30,7 +30,7 @@ pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
 pb_revwords: db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
 pd_16384: times 4 dd 16384
 
-section .text align=16
+SECTION_TEXT
 
 %macro SCALARPRODUCT 1
 ; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index fc5cb98926..bd2e8297e7 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -85,7 +85,7 @@ cextern cos_ %+ i
     %1
 %endmacro
 
-section .text align=16
+SECTION_TEXT
 
 %macro T2_3DN 4 ; z0, z1, mem0, mem1
     mova     %1, %3
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index e023b48322..13d6cc0130 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -22,7 +22,7 @@
 %include "x86inc.asm"
 %include "x86util.asm"
 
-section .text align=16
+SECTION_TEXT
 
 %macro PSWAPD_SSE 2
     pshufw %1, %2, 0x4e
diff --git a/libavcodec/x86/x86inc.asm b/libavcodec/x86/x86inc.asm
index 53091c14c9..c84d5566a2 100644
--- a/libavcodec/x86/x86inc.asm
+++ b/libavcodec/x86/x86inc.asm
@@ -63,11 +63,22 @@
     %elifidn __OUTPUT_FORMAT__,macho
         SECTION .text align=%1
         fakegot:
+    %elifidn __OUTPUT_FORMAT__,aout
+        section .text
     %else
         SECTION .rodata align=%1
     %endif
 %endmacro
 
+; aout does not support align=
+%macro SECTION_TEXT 0-1 16
+    %ifidn __OUTPUT_FORMAT__,aout
+        SECTION .text
+    %else
+        SECTION .text align=%1
+    %endif
+%endmacro
+
 %ifdef WIN64
     %define PIC
 %elifndef ARCH_X86_64

From 9297f1ed15a4707b1b59c5c347d7185ee0bb3ef8 Mon Sep 17 00:00:00 2001
From: Dave Yeo <dave.r.yeo@gmail.com>
Date: Fri, 20 May 2011 17:26:52 +0200
Subject: [PATCH 154/830] configure: Set OS/2 objformat to a.out.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 configure | 1 +
 1 file changed, 1 insertion(+)
 mode change 100755 => 100644 configure

diff --git a/configure b/configure
old mode 100755
new mode 100644
index 96867a550e..634edd54d3
--- a/configure
+++ b/configure
@@ -2473,6 +2473,7 @@ case $target_os in
         ;;
     os/2*)
         ln_s="cp -f"
+        objformat="aout"
         add_ldflags -Zomf -Zbin-files -Zargs-wild -Zmap
         SHFLAGS='$(SUBDIR)$(NAME).def -Zdll -Zomf'
         FFSERVERLDFLAGS=""

From 0a6db2a25a70e3e8fb414e361719152a9e7e9766 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 16:16:14 +0200
Subject: [PATCH 155/830] configure: Do not unconditionally add -Wall to host
 CFLAGS.

Some compilers choke on -Wall, so only add the flag after checking it works.
---
 configure | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 634edd54d3..b63330cbff 100644
--- a/configure
+++ b/configure
@@ -1672,7 +1672,7 @@ LIB_INSTALL_EXTRA_CMD='$$(RANLIB) "$(LIBDIR)/$(LIBNAME)"'
 
 CC_O='-o $@'
 
-host_cflags='-D_ISOC99_SOURCE -O3 -g -Wall'
+host_cflags='-D_ISOC99_SOURCE -O3 -g'
 host_libs='-lm'
 
 target_path='$(CURDIR)'
@@ -2334,6 +2334,7 @@ check_cc -D_LARGEFILE_SOURCE <<EOF && add_cppflags -D_LARGEFILE_SOURCE
 EOF
 
 check_host_cflags -std=c99
+check_host_cflags -Wall
 
 case "$arch" in
     alpha|ia64|mips|parisc|sparc)

From 42da8ea8e848e65611f0c606cd2c96d3a70c041d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 20 May 2011 10:49:20 -0400
Subject: [PATCH 156/830] LATM/AAC: Free previously initialized context on
 reinit.

Fixes memory leaks which are the result of overwriting already-initialized
MDCT contexts during context reinitialization, e.g. in valgrind
fate-aac-latm_000000001180bc60.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/aacdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index f2d50f4aba..fbb3582661 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -2464,6 +2464,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size,
             *out_size = 0;
             return avpkt->size;
         } else {
+            aac_decode_close(avctx);
             if ((err = aac_decode_init(avctx)) < 0)
                 return err;
             latmctx->initialized = 1;

From a1d0dcc7134f1401c682b5e13d4ab3864da21efb Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 20 May 2011 18:11:31 +0100
Subject: [PATCH 157/830] configure: make executable again

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 configure

diff --git a/configure b/configure
old mode 100644
new mode 100755

From 03c804e1ed65f52fd99020e1777103f4fb7abf15 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 20 May 2011 14:56:44 -0400
Subject: [PATCH 158/830] mp3lame: add #include required for AV_RB32 macro.

Fixes compilation with mp3lame enabled.
---
 libavcodec/libmp3lame.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index 62b55c9337..68f1bc9e2b 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -24,6 +24,7 @@
  * Interface to libmp3lame for mp3 encoding.
  */
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "mpegaudio.h"
 #include <lame/lame.h>

From 007f773942e5c8f39e9ad880d8e7e71f8f3bf7d2 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Sat, 21 May 2011 01:11:42 +0200
Subject: [PATCH 159/830] Encoding alac with more than two channels is not
 supported.

---
 libavcodec/alacenc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index c3a1fdfa03..bb618e16f2 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -389,6 +389,11 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
+    if(avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "channels > 2 not supported\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
     // Set default compression level
     if(avctx->compression_level == FF_COMPRESSION_DEFAULT)
         s->compression_level = 2;

From 94eadee7efc2c5d19ecfe92d36f0556663468080 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 20 Apr 2011 03:25:48 +0200
Subject: [PATCH 160/830] Merge/split side data.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/avcodec.h  |  5 +++
 libavcodec/avpacket.c | 81 ++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/utils.c    |  4 +--
 libavformat/utils.c   |  2 ++
 4 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 99c349ed15..a25612d2f9 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3229,6 +3229,11 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
 uint8_t* av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
                                  int *size);
 
+int av_packet_merge_side_data(AVPacket *pkt);
+
+int av_packet_split_side_data(AVPacket *pkt);
+
+
 /* resample.c */
 
 struct ReSampleContext;
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index ada73a598c..ff34285b48 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -21,7 +21,7 @@
 
 #include "avcodec.h"
 #include "libavutil/avassert.h"
-
+#include "bytestream.h"
 
 void av_destruct_packet_nofree(AVPacket *pkt)
 {
@@ -196,3 +196,82 @@ uint8_t* av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
     }
     return NULL;
 }
+
+#define FF_MERGE_MARKER 0x8c4d9d108e25e9feULL
+
+int av_packet_merge_side_data(AVPacket *pkt){
+    if(pkt->side_data_elems){
+        int i;
+        uint8_t *p;
+        uint64_t size= pkt->size + 8LL + FF_INPUT_BUFFER_PADDING_SIZE;
+        AVPacket old= *pkt;
+        for (i=0; i<old.side_data_elems; i++) {
+            size += old.side_data[i].size + 5LL;
+        }
+        if (size > INT_MAX)
+            return AVERROR(EINVAL);
+        p = av_malloc(size);
+        if (!p)
+            return AVERROR(ENOMEM);
+        pkt->data = p;
+        pkt->destruct = av_destruct_packet;
+        pkt->size = size - FF_INPUT_BUFFER_PADDING_SIZE;
+        bytestream_put_buffer(&p, old.data, old.size);
+        for (i=old.side_data_elems-1; i>=0; i--) {
+            bytestream_put_buffer(&p, old.side_data[i].data, old.side_data[i].size);
+            bytestream_put_be32(&p, old.side_data[i].size);
+            *p++ = old.side_data[i].type | ((i==old.side_data_elems-1)*128);
+        }
+        bytestream_put_be64(&p, FF_MERGE_MARKER);
+        av_assert0(p-pkt->data == pkt->size);
+        memset(p, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        av_free_packet(&old);
+        pkt->side_data_elems = 0;
+        pkt->side_data = NULL;
+        return 1;
+    }
+    return 0;
+}
+
+int av_packet_split_side_data(AVPacket *pkt){
+    if (!pkt->side_data_elems && pkt->size >12 && AV_RB64(pkt->data + pkt->size - 8) == FF_MERGE_MARKER){
+        int i;
+        unsigned int size;
+        uint8_t *p= pkt->data + pkt->size - 8 - 5;
+
+        av_dup_packet(pkt);
+
+        for (i=1; ; i++){
+            size = AV_RB32(p);
+            if (size>INT_MAX || p - pkt->data <= size)
+                return 0;
+            if (p[4]&128)
+                break;
+            p-= size+5;
+        }
+
+        pkt->side_data = av_malloc(i * sizeof(*pkt->side_data));
+        if (!pkt->side_data)
+            return AVERROR(ENOMEM);
+
+        p= pkt->data + pkt->size - 8 - 5;
+        for (i=0; ; i++){
+            size= AV_RB32(p);
+            av_assert0(size<=INT_MAX && p - pkt->data > size);
+            pkt->side_data[i].data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            pkt->side_data[i].size = size;
+            pkt->side_data[i].type = p[4]&127;
+            if (!pkt->side_data[i].data)
+                return AVERROR(ENOMEM);
+            memcpy(pkt->side_data[i].data, p-size, size);
+            pkt->size -= size + 5;
+            if(p[4]&128)
+                break;
+            p-= size+5;
+        }
+        pkt->size -= 8;
+        pkt->side_data_elems = i+1;
+        return 1;
+    }
+    return 0;
+}
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index b61f9b70ea..aecfb8af33 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -733,9 +733,9 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
     if((avctx->coded_width||avctx->coded_height) && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx))
         return -1;
 
-    avctx->pkt = avpkt;
-
     if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){
+        av_packet_split_side_data(avpkt);
+        avctx->pkt = avpkt;
         if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME)
              ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
                                           avpkt);
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3509620c20..8d9f9799cc 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -672,6 +672,8 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt)
                     s->streams[i]->request_probe = -1;
             continue;
         }
+
+        av_packet_merge_side_data(pkt);
         st= s->streams[pkt->stream_index];
 
         switch(st->codec->codec_type){

From 5c08c7b2151409c3b7a05e54fe46834deedff119 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 21 May 2011 02:17:10 +0200
Subject: [PATCH 161/830] Add a flag to disable side data merging.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/avformat.h | 2 +-
 libavformat/options.c  | 1 +
 libavformat/utils.c    | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 2574bbc424..991b0a4fd5 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -731,7 +731,7 @@ typedef struct AVFormatContext {
 #define AVFMT_FLAG_MP4A_LATM    0x0080 ///< Enable RTP MP4A-LATM payload
 #define AVFMT_FLAG_SORT_DTS    0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down)
 #define AVFMT_FLAG_PRIV_OPT    0x20000 ///< Enable use of private options by delaying codec open (this could be made default once all code is converted)
-
+#define AVFMT_FLAG_KEEP_SIDE_DATA 0x40000 ///< Dont merge side data but keep it seperate.
     int loop_input;
 
     /**
diff --git a/libavformat/options.c b/libavformat/options.c
index 82be8487eb..5e0e3a428d 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -51,6 +51,7 @@ static const AVOption options[]={
 {"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"},
 {"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
 {"sortdts", "try to interleave outputted packets by dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_SORT_DTS }, INT_MIN, INT_MAX, D, "fflags"},
+{"keepside", "dont merge side data", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_KEEP_SIDE_DATA }, INT_MIN, INT_MAX, D, "fflags"},
 {"latm", "enable RTP MP4A-LATM payload", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"},
 {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 8d9f9799cc..c5e570028e 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -673,7 +673,8 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt)
             continue;
         }
 
-        av_packet_merge_side_data(pkt);
+        if(!(s->flags & AVFMT_FLAG_KEEP_SIDE_DATA))
+            av_packet_merge_side_data(pkt);
         st= s->streams[pkt->stream_index];
 
         switch(st->codec->codec_type){

From 0424e052f83adc422d8a746e3cdc5ab6bc28679e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 21 May 2011 03:11:50 +0200
Subject: [PATCH 162/830]     Merge remote-tracking branch 'ffmpeg-mt/master'

    * ffmpeg-mt/master:
      Update todo.
      h264: add an assert that copied pictures are valid picture pointers
      valgrind-check: run with 1 and 3 threads
      h264: When decoding a packet with multiple PPS/SPS, don't start the next thread until all of them have been read
      Allow some pictures to be released earlier after 51ead6d2c40c5defdd211f435aec49b19f5f6a18
      h264: fix slice threading MC reading uninitialized frame edges.

    Please see ffmpeg-mt for a list of authors of these changes.

    Conflicts:
        libavcodec/h264.c
        mt-work/valgrind-check.sh

    Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c         | 55 ++++++++++++++++++++++++++++++---------
 libavcodec/mpegvideo.c    |  4 +--
 mt-work/todo.txt          | 14 +++++-----
 mt-work/valgrind-check.sh |  4 ++-
 4 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 3068db8d85..3a1f821d04 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -312,7 +312,6 @@ static void chroma_dc_dct_c(DCTELEM *block){
 }
 #endif
 
-
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
     H264Context *hx;
@@ -612,11 +611,15 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     return 0;
 }
 
+#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
 {
     int i;
 
     for (i=0; i<count; i++){
+        assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
+                IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
+                !from[i]));
         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
     }
 }
@@ -796,8 +799,10 @@ int ff_h264_frame_start(H264Context *h){
   * This includes finding the next displayed frame.
   *
   * @param h h264 master context
+  * @param setup_finished enough NALs have been read that we can call
+  * ff_thread_finish_setup()
   */
-static void decode_postinit(H264Context *h){
+static void decode_postinit(H264Context *h, int setup_finished){
     MpegEncContext * const s = &h->s;
     Picture *out = s->current_picture_ptr;
     Picture *cur = s->current_picture_ptr;
@@ -809,10 +814,11 @@ static void decode_postinit(H264Context *h){
     if (h->next_output_pic) return;
 
     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
-        //FIXME this allows the next thread to start once we encounter the first field of a PAFF packet
-        //This works if the next packet contains the second field. It does not work if both fields are
-        //in the same packet.
-        //ff_thread_finish_setup(s->avctx);
+        //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
+        //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
+        //to find this yet, so we assume the worst for now.
+        //if (setup_finished)
+        //    ff_thread_finish_setup(s->avctx);
         return;
     }
 
@@ -943,7 +949,8 @@ static void decode_postinit(H264Context *h){
         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
     }
 
-    ff_thread_finish_setup(s->avctx);
+    if (setup_finished)
+        ff_thread_finish_setup(s->avctx);
 }
 
 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
@@ -2310,7 +2317,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     }
 
     //FIXME: fix draw_edges+PAFF+frame threads
-    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type&FF_THREAD_FRAME)) ? 0 : 16;
+    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
@@ -2892,10 +2899,13 @@ static void execute_decode_slices(H264Context *h, int context_count){
 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
-    int buf_index=0;
     H264Context *hx; ///< thread context
-    int context_count = 0;
-    int next_avc= h->is_avc ? 0 : buf_size;
+    int buf_index;
+    int context_count;
+    int next_avc;
+    int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
+    int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts
+    int nal_index;
 
     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
 #if 0
@@ -2911,6 +2921,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         ff_h264_reset_sei(h);
     }
 
+    for(;pass <= 1;pass++){
+        buf_index = 0;
+        context_count = 0;
+        next_avc = h->is_avc ? 0 : buf_size;
+        nal_index = 0;
     for(;;){
         int consumed;
         int dst_length;
@@ -2969,6 +2984,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         }
 
         buf_index += consumed;
+        nal_index++;
+
+        if(pass == 0) {
+            // packets can sometimes contain multiple PPS/SPS
+            // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
+            // if so, when frame threading we can't start the next thread until we've read all of them
+            switch (hx->nal_unit_type) {
+                case NAL_SPS:
+                case NAL_PPS:
+                    nals_needed = nal_index;
+            }
+            continue;
+        }
 
         //FIXME do not discard SEI id
         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
@@ -2998,7 +3026,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
 
             if (h->current_slice == 1) {
                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
-                    decode_postinit(h);
+                    decode_postinit(h, nal_index >= nals_needed);
                 }
 
                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
@@ -3115,6 +3143,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             goto again;
         }
     }
+    }
     if(context_count)
         execute_decode_slices(h, context_count);
     return buf_index;
@@ -3190,7 +3219,7 @@ static int decode_frame(AVCodecContext *avctx,
 
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
 
-        if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h);
+        if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
 
         field_end(h, 0);
 
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 6bb35f9f39..0a0a11ebc9 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -316,7 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type;
     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B)
         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
-    pic->owner2 = s;
+    pic->owner2 = NULL;
 
     return 0;
 fail: //for the FF_ALLOCZ_OR_GOTO macro
@@ -955,7 +955,7 @@ void ff_release_unused_pictures(MpegEncContext *s, int remove_current)
     /* release non reference frames */
     for(i=0; i<s->picture_count; i++){
         if(s->picture[i].data[0] && !s->picture[i].reference
-           && s->picture[i].owner2 == s
+           && (!s->picture[i].owner2 || s->picture[i].owner2 == s)
            && (remove_current || &s->picture[i] != s->current_picture_ptr)
            /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
             free_frame_buffer(s, &s->picture[i]);
diff --git a/mt-work/todo.txt b/mt-work/todo.txt
index 013853e3ae..678d213d8c 100644
--- a/mt-work/todo.txt
+++ b/mt-work/todo.txt
@@ -1,7 +1,7 @@
 Todo
 
 -- For other people
-- Multithread vp8 or vc1.
+- Multithread vc1.
 - Multithread an intra codec like mjpeg (trivial).
 - Fix mpeg1 (see below).
 - Try the first three items under Optimization.
@@ -18,11 +18,13 @@ work.) In general testing error paths should be done more.
 bugs in vsync in ffmpeg.c, which are currently obscuring real failures.
 
 h264:
-- Files split at the wrong NAL unit don't (and can't)
-be decoded with threads (e.g. TS split so PPS is after
-the frame, PAFF with two fields in a packet). Scan the
-packet at the start of decode and don't finish setup
-until all PPS/SPS have been encountered.
+- Files that aren't parsed (e.g. mp4) and contain PAFF with two
+field pictures in the same packet are not optimal. Modify the
+nals_needed check so that the second field's first slice is
+considered as needed, then uncomment the FIXME code in decode_postinit.
+Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4
+- The conformance sample MR3_TANDBERG_B.264 has problems (allocated picture overflow).
+- One 10-bit sample has problems.
 
 mpeg4:
 - Packed B-frames need to be explicitly split up
diff --git a/mt-work/valgrind-check.sh b/mt-work/valgrind-check.sh
index dc3833abb6..276327a76a 100644
--- a/mt-work/valgrind-check.sh
+++ b/mt-work/valgrind-check.sh
@@ -1,3 +1,5 @@
 #!/bin/bash
 
-valgrind --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f framecrc /dev/null
\ No newline at end of file
+valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null
+
+valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null

From 1b30e4f5865260323da5232174fc68d6cc283f45 Mon Sep 17 00:00:00 2001
From: Gianluigi Tiesi <mplayer@netfarm.it>
Date: Sat, 21 May 2011 03:36:26 +0200
Subject: [PATCH 163/830] preset dir for win32

---
 cmdutils.c      | 17 +++++++++++++++++
 cmdutils.h      |  3 ++-
 doc/ffmpeg.texi |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index 5a0c8f378f..8265fbcd0f 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -849,6 +849,23 @@ FILE *get_preset_file(char *filename, size_t filename_size,
         av_strlcpy(filename, preset_name, filename_size);
         f = fopen(filename, "r");
     } else {
+#ifdef _WIN32
+        char datadir[MAX_PATH], *ls;
+        base[2] = NULL;
+
+        if (GetModuleFileNameA(GetModuleHandleA(NULL), datadir, sizeof(datadir) - 1))
+        {
+            for (ls = datadir; ls < datadir + strlen(datadir); ls++)
+                if (*ls == '\\') *ls = '/';
+
+            if (ls = strrchr(datadir, '/'))
+            {
+                *ls = 0;
+                strncat(datadir, "/ffpresets",  sizeof(datadir) - 1 - strlen(datadir));
+                base[2] = datadir;
+            }
+        }
+#endif
         for (i = 0; i < 3 && !f; i++) {
             if (!base[i])
                 continue;
diff --git a/cmdutils.h b/cmdutils.h
index 5a9314681a..9e5827fc2b 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -243,7 +243,8 @@ int read_file(const char *filename, char **bufptr, size_t *size);
  * If is_path is non-zero, look for the file in the path preset_name.
  * Otherwise search for a file named arg.ffpreset in the directories
  * $FFMPEG_DATADIR (if set), $HOME/.ffmpeg, and in the datadir defined
- * at configuration time, in that order. If no such file is found and
+ * at configuration time or in a "ffpresets" folder along the executable
+ * on win32, in that order. If no such file is found and
  * codec_name is defined, then search for a file named
  * codec_name-preset_name.ffpreset in the above-mentioned directories.
  *
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 77fde18564..fe9e9a9e92 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -816,6 +816,7 @@ following rules:
 First ffmpeg searches for a file named @var{arg}.ffpreset in the
 directories @file{$FFMPEG_DATADIR} (if set), and @file{$HOME/.ffmpeg}, and in
 the datadir defined at configuration time (usually @file{PREFIX/share/ffmpeg})
+or in a @file{ffpresets} folder along the executable on win32,
 in that order. For example, if the argument is @code{libx264-max}, it will
 search for the file @file{libx264-max.ffpreset}.
 

From 0eed5016a27ec2f6990c426e79eb28fbf370f613 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 20 May 2011 14:59:47 +0300
Subject: [PATCH 164/830] avoptions: Support getting flag values using
 av_get_int

---
 libavutil/opt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index 720ad99e8e..5976c3377e 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -261,7 +261,7 @@ static int av_get_number(void *obj, const char *name, const AVOption **o_out, do
 {
     const AVOption *o= av_find_opt(obj, name, NULL, 0, 0);
     void *dst;
-    if (!o || o->offset<=0)
+    if (!o || (o->offset<=0 && o->type != FF_OPT_TYPE_CONST))
         goto error;
 
     dst= ((uint8_t*)obj) + o->offset;
@@ -277,6 +277,7 @@ static int av_get_number(void *obj, const char *name, const AVOption **o_out, do
     case FF_OPT_TYPE_RATIONAL:  *intnum= ((AVRational*)dst)->num;
                                 *den   = ((AVRational*)dst)->den;
                                                         return 0;
+    case FF_OPT_TYPE_CONST:     *intnum= o->default_val.dbl;return 0;
     }
 error:
     *den=*intnum=0;

From ab1c19efc59cbcb1ff1b0c076cde01b87837f21e Mon Sep 17 00:00:00 2001
From: Maksym Veremeyenko <verem@m1stereo.tv>
Date: Sat, 21 May 2011 04:39:20 +0200
Subject: [PATCH 165/830] fix ffserver's SIGSEGV

---
 ffmpeg.c   | 1 +
 ffserver.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index fb644adc18..38297cc7f4 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -703,6 +703,7 @@ static int read_ffserver_streams(AVFormatContext *s, const char *filename)
         return err;
     /* copy stream format */
     s->nb_streams = 0;
+    s->streams = av_mallocz(sizeof(*s->streams) * ic->nb_streams);
     for(i=0;i<ic->nb_streams;i++) {
         AVStream *st;
         AVCodec *codec;
diff --git a/ffserver.c b/ffserver.c
index 81c9fcf448..517cb6f3f3 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -2229,6 +2229,7 @@ static int http_prepare_data(HTTPContext *c)
         av_metadata_set2(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0);
         av_metadata_set2(&c->fmt_ctx.metadata, "title"    , c->stream->title    , 0);
 
+        c->fmt_ctx.streams = av_mallocz(sizeof(*c->fmt_ctx.streams) * c->stream->nb_streams);
         for(i=0;i<c->stream->nb_streams;i++) {
             AVStream *st;
             AVStream *src;

From 840238b8706ea2ca740cf4b49bffd8ae196352f9 Mon Sep 17 00:00:00 2001
From: Mike William <mike@mikebwilliams.com>
Date: Sat, 21 May 2011 04:50:59 +0200
Subject: [PATCH 166/830] ffserver: dont just crash

With changes from ubitux.
---
 ffmpeg.c   | 2 ++
 ffserver.c | 9 ++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 38297cc7f4..4d519c7ac9 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -713,6 +713,8 @@ static int read_ffserver_streams(AVFormatContext *s, const char *filename)
         // FIXME: a more elegant solution is needed
         st = av_mallocz(sizeof(AVStream));
         memcpy(st, ic->streams[i], sizeof(AVStream));
+        st->info = av_malloc(sizeof(*st->info));
+        memcpy(st->info, ic->streams[i]->info, sizeof(*st->info));
         st->codec = avcodec_alloc_context();
         if (!st->codec) {
             print_error(filename, AVERROR(ENOMEM));
diff --git a/ffserver.c b/ffserver.c
index 517cb6f3f3..4c2c5cf890 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -3386,6 +3386,9 @@ static int rtp_new_av_stream(HTTPContext *c,
     if (!st)
         goto fail;
     ctx->nb_streams = 1;
+    ctx->streams = av_mallocz(sizeof(*ctx->streams) * ctx->nb_streams);
+    if (!ctx->streams)
+        goto fail;
     ctx->streams[0] = st;
 
     if (!c->stream->feed ||
@@ -3766,11 +3769,7 @@ static void build_feed_streams(void)
             }
             s->oformat = feed->fmt;
             s->nb_streams = feed->nb_streams;
-            for(i=0;i<s->nb_streams;i++) {
-                AVStream *st;
-                st = feed->streams[i];
-                s->streams[i] = st;
-            }
+            s->streams = feed->streams;
             av_set_parameters(s, NULL);
             if (av_write_header(s) < 0) {
                 http_log("Container doesn't supports the required parameters\n");

From 21bbca5b4422ddd10363bf1d8494564c54639b39 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 12 May 2011 18:18:54 +0200
Subject: [PATCH 167/830] add changelog entries for 0.7_beta2

---
 Changelog | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Changelog b/Changelog
index f78150e03e..b05755503c 100644
--- a/Changelog
+++ b/Changelog
@@ -4,11 +4,15 @@ releases are sorted from youngest to oldest.
 
 version <next>:
 
+
+version 0.7_beta2:
+
 - Lots of deprecated API cruft removed
 - fft and imdct optimizations for AVX (Sandy Bridge) processors
 - DPX image encoder
 - SMPTE 302M AES3 audio decoder
 - Remove support for quitting ffmpeg with 'q', ctrl+c should be used.
+- 9bit and 10bit per sample support in the h264 decoder
 
 
 version 0.7_beta1:

From f8c49d02b0f17c3011712ec4638ec4c6c76e7831 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Sat, 21 May 2011 11:08:44 +0200
Subject: [PATCH 168/830] Fix channel_layout documentation.

libavformat/riff.c has been writing channel_layout for as long as
the field exists.
---
 libavcodec/avcodec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index a25612d2f9..fdc86bb7c0 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2611,7 +2611,7 @@ typedef struct AVCodecContext {
     /**
      * Audio channel layout.
      * - encoding: set by user.
-     * - decoding: set by libavcodec.
+     * - decoding: set by user, may be overwritten by libavcodec.
      */
     int64_t channel_layout;
 

From cdca7c378ed46cf67a7583a102ba1b2b91d00b9c Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 13 May 2011 10:24:31 +0300
Subject: [PATCH 169/830] svq3: Do initialization after parsing the extradata

If done before, some parameters aren't known yet.

With svq3/rtp, initializing before some parameters are known
can lead to calling av_malloc(0), which on OS X currently returns
broken pointers.
---
 libavcodec/svq3.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 1e4c962ba9..bc0215eff5 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -804,20 +804,11 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = avctx->codec->pix_fmts[0];
 
     if (!s->context_initialized) {
-        s->width  = avctx->width;
-        s->height = avctx->height;
         h->halfpel_flag      = 1;
         h->thirdpel_flag     = 1;
         h->unknown_svq3_flag = 0;
         h->chroma_qp[0]      = h->chroma_qp[1] = 4;
 
-        if (MPV_common_init(s) < 0)
-            return -1;
-
-        h->b_stride = 4*s->mb_width;
-
-        ff_h264_alloc_tables(h);
-
         /* prowl for the "SEQH" marker in the extradata */
         extradata = (unsigned char *)avctx->extradata;
         for (m = 0; m < avctx->extradata_size; m++) {
@@ -904,6 +895,16 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
 #endif
             }
         }
+
+        s->width  = avctx->width;
+        s->height = avctx->height;
+
+        if (MPV_common_init(s) < 0)
+            return -1;
+
+        h->b_stride = 4*s->mb_width;
+
+        ff_h264_alloc_tables(h);
     }
 
     return 0;

From 7d4c4394b5c94a665cc807fb8b92ea153b6225b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux@gmail.com>
Date: Thu, 5 May 2011 21:25:46 +0200
Subject: [PATCH 170/830] swscale: point out an alternative to sws_getContext

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libswscale/swscale.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 5d0c9e9049..dd4de76b0a 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -187,6 +187,7 @@ void sws_freeContext(struct SwsContext *swsContext);
  * @return a pointer to an allocated context, or NULL in case of error
  * @note this function is to be removed after a saner alternative is
  *       written
+ * @deprecated Use sws_getCachedContext() instead.
  */
 struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
                                   int dstW, int dstH, enum PixelFormat dstFormat,

From 9e2dabed4a7bf21e3e0c0f4ddc895f8ed90fa839 Mon Sep 17 00:00:00 2001
From: Can Wu <wu.canus@gmail.com>
Date: Sat, 14 May 2011 17:27:31 +0800
Subject: [PATCH 171/830] avio: check AVIOContext malloc failure

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/aviobuf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 2b14d48ff5..fa63ddf2b9 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -113,6 +113,8 @@ AVIOContext *avio_alloc_context(
                   int64_t (*seek)(void *opaque, int64_t offset, int whence))
 {
     AVIOContext *s = av_mallocz(sizeof(AVIOContext));
+    if (!s)
+        return NULL;
     ffio_init_context(s, buffer, buffer_size, write_flag, opaque,
                   read_packet, write_packet, seek);
     return s;

From d49051e0742c09345495ae0486c3601a15222ac4 Mon Sep 17 00:00:00 2001
From: Can Wu <wu.canus@gmail.com>
Date: Sat, 14 May 2011 17:34:28 +0800
Subject: [PATCH 172/830] avio: document buffer must created with av_malloc()
 and friends

Else a later buffer resize in ffio_set_buf_size() will ABORT.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/avio.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/avio.h b/libavformat/avio.h
index b98137b83b..e07e3c3c92 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h
@@ -373,6 +373,7 @@ void avio_set_interrupt_cb(int (*interrupt_cb)(void));
  * freed with av_free().
  *
  * @param buffer Memory block for input/output operations via AVIOContext.
+ *        The buffer must be allocated with av_malloc() and friends.
  * @param buffer_size The buffer size is very important for performance.
  *        For protocols with fixed blocksize it should be set to this blocksize.
  *        For others a typical size is a cache page, e.g. 4kb.

From 153382e1b6b428a1dcb8dc3f06f64a6959d722c5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 17 May 2011 16:58:04 +0200
Subject: [PATCH 173/830] multiple inclusion guard cleanup

Add missing multiple inclusion guards; clean up #endif comments;
add missing library prefixes; keep guard names consistent.
---
 cmdutils.h                         | 6 +++---
 ffserver.h                         | 7 ++++---
 libavcodec/aac_tablegen.h          | 6 +++---
 libavcodec/aac_tablegen_decl.h     | 6 +++---
 libavcodec/amrwbdata.h             | 2 +-
 libavcodec/arm/asm-offsets.h       | 2 +-
 libavcodec/arm/dsputil_arm.h       | 2 +-
 libavcodec/arm/mpegvideo_arm.h     | 2 +-
 libavcodec/arm/vp56_arith.h        | 2 +-
 libavcodec/arm/vp8.h               | 2 +-
 libavcodec/cavsdsp.h               | 2 +-
 libavcodec/cbrt_tablegen.h         | 6 +++---
 libavcodec/cga_data.h              | 2 +-
 libavcodec/dct.h                   | 2 +-
 libavcodec/dctref.h                | 2 +-
 libavcodec/dv_tablegen.h           | 6 +++---
 libavcodec/flv.h                   | 3 +--
 libavcodec/gsmdec_data.h           | 6 +++---
 libavcodec/h263.h                  | 3 ++-
 libavcodec/kbdwin.h                | 2 +-
 libavcodec/motionpixels_tablegen.h | 6 +++---
 libavcodec/mpeg4video.h            | 2 +-
 libavcodec/mpegaudio_tablegen.h    | 6 +++---
 libavcodec/mpegaudiodsp.h          | 2 +-
 libavcodec/msgsmdec.h              | 6 +++---
 libavcodec/nellymoser.h            | 2 +-
 libavcodec/opt.h                   | 2 +-
 libavcodec/pcm_tablegen.h          | 6 +++---
 libavcodec/qdm2_tablegen.h         | 6 +++---
 libavcodec/rdft.h                  | 2 +-
 libavcodec/sh4/dsputil_sh4.h       | 2 +-
 libavcodec/sinewin.h               | 2 +-
 libavcodec/sinewin_tablegen.h      | 5 +++++
 libavcodec/sparc/dsputil_vis.h     | 2 +-
 libavcodec/targa.h                 | 5 +++++
 libavcodec/vp8.h                   | 2 +-
 libavcodec/vp8data.h               | 2 +-
 libavcodec/x86/fft.h               | 2 +-
 libavcodec/xvmc_internal.h         | 6 +++---
 libavfilter/avfilter.h             | 2 +-
 libavfilter/avfiltergraph.h        | 2 +-
 libavfilter/internal.h             | 2 +-
 libavfilter/vsrc_buffer.h          | 4 ++++
 libavformat/avio_internal.h        | 2 +-
 libavformat/ffmeta.h               | 6 +++---
 libavformat/mms.h                  | 3 ++-
 libavformat/spdif.h                | 5 +++++
 libavformat/url.h                  | 2 +-
 libavformat/version.h              | 2 +-
 libavutil/avassert.h               | 2 +-
 libavutil/cpu.h                    | 2 +-
 51 files changed, 97 insertions(+), 76 deletions(-)

diff --git a/cmdutils.h b/cmdutils.h
index c99c8653fa..3bb1cd616b 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef FFMPEG_CMDUTILS_H
-#define FFMPEG_CMDUTILS_H
+#ifndef LIBAV_CMDUTILS_H
+#define LIBAV_CMDUTILS_H
 
 #include <stdint.h>
 
@@ -295,4 +295,4 @@ extern AVFilter ffsink;
 int get_filtered_video_frame(AVFilterContext *sink, AVFrame *frame,
                              AVFilterBufferRef **picref, AVRational *pts_tb);
 
-#endif /* FFMPEG_CMDUTILS_H */
+#endif /* LIBAV_CMDUTILS_H */
diff --git a/ffserver.h b/ffserver.h
index 868b83bca4..43bc79c2c6 100644
--- a/ffserver.h
+++ b/ffserver.h
@@ -18,11 +18,12 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
-#ifndef FFMPEG_FFSERVER_H
-#define FFMPEG_FFSERVER_H
+
+#ifndef LIBAV_FFSERVER_H
+#define LIBAV_FFSERVER_H
 
 /* interface between ffserver and modules */
 
 void ffserver_module_init(void);
 
-#endif /* FFMPEG_FFSERVER_H */
+#endif /* LIBAV_FFSERVER_H */
diff --git a/libavcodec/aac_tablegen.h b/libavcodec/aac_tablegen.h
index 98895694a8..4486e1a9fc 100644
--- a/libavcodec/aac_tablegen.h
+++ b/libavcodec/aac_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AAC_TABLEGEN_H
-#define AAC_TABLEGEN_H
+#ifndef AVCODEC_AAC_TABLEGEN_H
+#define AVCODEC_AAC_TABLEGEN_H
 
 #include "aac_tablegen_decl.h"
 
@@ -40,4 +40,4 @@ void ff_aac_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* AAC_TABLEGEN_H */
+#endif /* AVCODEC_AAC_TABLEGEN_H */
diff --git a/libavcodec/aac_tablegen_decl.h b/libavcodec/aac_tablegen_decl.h
index ce4ecb5bf1..496ca0c677 100644
--- a/libavcodec/aac_tablegen_decl.h
+++ b/libavcodec/aac_tablegen_decl.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AAC_TABLEGEN_DECL_H
-#define AAC_TABLEGEN_DECL_H
+#ifndef AVCODEC_AAC_TABLEGEN_DECL_H
+#define AVCODEC_AAC_TABLEGEN_DECL_H
 
 #if CONFIG_HARDCODED_TABLES
 #define ff_aac_tableinit()
@@ -31,4 +31,4 @@ void ff_aac_tableinit(void);
 extern       float ff_aac_pow2sf_tab[428];
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* AAC_TABLEGEN_DECL_H */
+#endif /* AVCODEC_AAC_TABLEGEN_DECL_H */
diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h
index f4db99e208..5421c23afb 100644
--- a/libavcodec/amrwbdata.h
+++ b/libavcodec/amrwbdata.h
@@ -1887,4 +1887,4 @@ static const uint16_t cf_sizes_wb[] = {
     40 /// SID/comfort noise frame
 };
 
-#endif
+#endif /* AVCODEC_AMRWBDATA_H */
diff --git a/libavcodec/arm/asm-offsets.h b/libavcodec/arm/asm-offsets.h
index 43c16301c0..110d33dbb5 100644
--- a/libavcodec/arm/asm-offsets.h
+++ b/libavcodec/arm/asm-offsets.h
@@ -36,4 +36,4 @@
 #define H263_AIC                 0xf0
 #define INTER_SCANTAB_RASTER_END 0x138
 
-#endif
+#endif /* AVCODEC_ARM_ASM_OFFSETS_H */
diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h
index 14d9836114..6d7e6a6d16 100644
--- a/libavcodec/arm/dsputil_arm.h
+++ b/libavcodec/arm/dsputil_arm.h
@@ -30,4 +30,4 @@ void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
 
-#endif
+#endif /* AVCODEC_ARM_DSPUTIL_H */
diff --git a/libavcodec/arm/mpegvideo_arm.h b/libavcodec/arm/mpegvideo_arm.h
index 0812ca1657..a36da6112b 100644
--- a/libavcodec/arm/mpegvideo_arm.h
+++ b/libavcodec/arm/mpegvideo_arm.h
@@ -24,4 +24,4 @@
 void MPV_common_init_iwmmxt(MpegEncContext *s);
 void MPV_common_init_armv5te(MpegEncContext *s);
 
-#endif
+#endif /* AVCODEC_ARM_MPEGVIDEO_H */
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index f5dbd1d4c7..0591d614a9 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -89,4 +89,4 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
 
 #endif
 
-#endif
+#endif /* AVCODEC_ARM_VP56_ARITH_H */
diff --git a/libavcodec/arm/vp8.h b/libavcodec/arm/vp8.h
index 35cdd8b2bf..76a0397a8d 100644
--- a/libavcodec/arm/vp8.h
+++ b/libavcodec/arm/vp8.h
@@ -26,4 +26,4 @@ int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, DCTELEM block[16],
                                  int i, uint8_t *token_prob, int16_t qmul[2]);
 #endif
 
-#endif
+#endif /* AVCODEC_ARM_VP8_H */
diff --git a/libavcodec/cavsdsp.h b/libavcodec/cavsdsp.h
index de2f530d83..b1133b7264 100644
--- a/libavcodec/cavsdsp.h
+++ b/libavcodec/cavsdsp.h
@@ -38,4 +38,4 @@ typedef struct CAVSDSPContext {
 void ff_cavsdsp_init(CAVSDSPContext* c, AVCodecContext *avctx);
 void ff_cavsdsp_init_mmx(CAVSDSPContext* c, AVCodecContext *avctx);
 
-#endif
+#endif /* AVCODEC_CAVSDSP_H */
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 977450c124..01963a3f9d 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef CBRT_TABLEGEN_H
-#define CBRT_TABLEGEN_H
+#ifndef AVCODEC_CBRT_TABLEGEN_H
+#define AVCODEC_CBRT_TABLEGEN_H
 
 #include <stdint.h>
 #include <math.h>
@@ -48,4 +48,4 @@ static void cbrt_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* CBRT_TABLEGEN_H */
+#endif /* AVCODEC_CBRT_TABLEGEN_H */
diff --git a/libavcodec/cga_data.h b/libavcodec/cga_data.h
index 60f572f9b6..2149cfd2f1 100644
--- a/libavcodec/cga_data.h
+++ b/libavcodec/cga_data.h
@@ -45,4 +45,4 @@ extern const uint32_t ff_ega_palette[64];
  */
 void ff_draw_pc_font(uint8_t *dst, int linesize, const uint8_t *font, int font_height, int ch, int fg, int bg);
 
-#endif
+#endif /* AVCODEC_CGA_DATA_H */
diff --git a/libavcodec/dct.h b/libavcodec/dct.h
index faddaa3d7b..c898856279 100644
--- a/libavcodec/dct.h
+++ b/libavcodec/dct.h
@@ -49,4 +49,4 @@ void ff_dct_end (DCTContext *s);
 
 void ff_dct_init_mmx(DCTContext *s);
 
-#endif
+#endif /* AVCODEC_DCT_H */
diff --git a/libavcodec/dctref.h b/libavcodec/dctref.h
index ffd3533439..ba89abd752 100644
--- a/libavcodec/dctref.h
+++ b/libavcodec/dctref.h
@@ -28,4 +28,4 @@ void ff_ref_fdct(DCTELEM *block);
 void ff_ref_idct(DCTELEM *block);
 void ff_ref_dct_init(void);
 
-#endif
+#endif /* AVCODEC_DCTREF_H */
diff --git a/libavcodec/dv_tablegen.h b/libavcodec/dv_tablegen.h
index 0810f8e7a5..4fa8d91374 100644
--- a/libavcodec/dv_tablegen.h
+++ b/libavcodec/dv_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef DV_TABLEGEN_H
-#define DV_TABLEGEN_H
+#ifndef AVCODEC_DV_TABLEGEN_H
+#define AVCODEC_DV_TABLEGEN_H
 
 #include <stdint.h>
 #include "dv_vlc_data.h"
@@ -93,4 +93,4 @@ static void dv_vlc_map_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* DV_TABLEGEN_H */
+#endif /* AVCODEC_DV_TABLEGEN_H */
diff --git a/libavcodec/flv.h b/libavcodec/flv.h
index 84111175c6..3d9a2d5232 100644
--- a/libavcodec/flv.h
+++ b/libavcodec/flv.h
@@ -30,5 +30,4 @@ void ff_flv2_encode_ac_esc(PutBitContext *pb, int slevel, int level, int run, in
 int ff_flv_decode_picture_header(MpegEncContext *s);
 void ff_flv2_decode_ac_esc(GetBitContext *gb, int *level, int *run, int *last);
 
-#endif
-
+#endif /* AVCODEC_FLV_H */
diff --git a/libavcodec/gsmdec_data.h b/libavcodec/gsmdec_data.h
index 32cd01ea7a..b78daa7335 100644
--- a/libavcodec/gsmdec_data.h
+++ b/libavcodec/gsmdec_data.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef GSMDEC_DATA
-#define GSMDEC_DATA
+#ifndef AVCODEC_GSMDEC_DATA
+#define AVCODEC_GSMDEC_DATA
 
 #include <stdint.h>
 
@@ -44,4 +44,4 @@ typedef struct {
 extern const uint16_t ff_gsm_long_term_gain_tab[4];
 extern const int16_t ff_gsm_dequant_tab[64][8];
 
-#endif
+#endif /* AVCODEC_GSMDEC_DATA */
diff --git a/libavcodec/h263.h b/libavcodec/h263.h
index cdbe44eb90..1dc300709e 100644
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h
@@ -248,4 +248,5 @@ static inline void memsetw(short *tab, int val, int n)
     for(i=0;i<n;i++)
         tab[i] = val;
 }
-#endif
+
+#endif /* AVCODEC_H263_H */
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
index 5ac2b8ca5a..89b569aa7c 100644
--- a/libavcodec/kbdwin.h
+++ b/libavcodec/kbdwin.h
@@ -32,4 +32,4 @@
  */
 void ff_kbd_window_init(float *window, float alpha, int n);
 
-#endif
+#endif /* AVCODEC_KBDWIN_H */
diff --git a/libavcodec/motionpixels_tablegen.h b/libavcodec/motionpixels_tablegen.h
index 9516dfe579..cbf56c8694 100644
--- a/libavcodec/motionpixels_tablegen.h
+++ b/libavcodec/motionpixels_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef MOTIONPIXELS_TABLEGEN_H
-#define MOTIONPIXELS_TABLEGEN_H
+#ifndef AVCODEC_MOTIONPIXELS_TABLEGEN_H
+#define AVCODEC_MOTIONPIXELS_TABLEGEN_H
 
 #include <stdint.h>
 
@@ -88,4 +88,4 @@ static void motionpixels_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* MOTIONPIXELS_TABLEGEN_H */
+#endif /* AVCODEC_MOTIONPIXELS_TABLEGEN_H */
diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index 015193e13b..d34e73149c 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h
@@ -196,4 +196,4 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
 
     return ret;
 }
-#endif
+#endif /* AVCODEC_MPEG4VIDEO_H */
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 2264b739d2..a222f2c423 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef MPEGAUDIO_TABLEGEN_H
-#define MPEGAUDIO_TABLEGEN_H
+#ifndef AVCODEC_MPEGAUDIO_TABLEGEN_H
+#define AVCODEC_MPEGAUDIO_TABLEGEN_H
 
 #include <stdint.h>
 #include <math.h>
@@ -68,4 +68,4 @@ static void mpegaudio_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* MPEGAUDIO_TABLEGEN_H */
+#endif /* AVCODEC_MPEGAUDIO_TABLEGEN_H */
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index 597e2533f5..a47019cc4b 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -60,4 +60,4 @@ void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
                                   int *dither_state, int16_t *samples,
                                   int incr);
 
-#endif
+#endif /* AVCODEC_MPEGAUDIODSP_H */
diff --git a/libavcodec/msgsmdec.h b/libavcodec/msgsmdec.h
index cf58baaa47..76c87f1bd9 100644
--- a/libavcodec/msgsmdec.h
+++ b/libavcodec/msgsmdec.h
@@ -19,12 +19,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef MSGSMDEC_H
-#define MSGSMDEC_H
+#ifndef AVCODEC_MSGSMDEC_H
+#define AVCODEC_MSGSMDEC_H
 
 #include "avcodec.h"
 
 int ff_msgsm_decode_block(AVCodecContext *avctx, int16_t *samples,
                           const uint8_t *buf);
 
-#endif
+#endif /* AVCODEC_MSGSMDEC_H */
diff --git a/libavcodec/nellymoser.h b/libavcodec/nellymoser.h
index 88d9aa6245..027fc7ed23 100644
--- a/libavcodec/nellymoser.h
+++ b/libavcodec/nellymoser.h
@@ -54,4 +54,4 @@ extern const int16_t  ff_nelly_delta_table[32];
 
 void ff_nelly_get_sample_bits(const float *buf, int *bits);
 
-#endif
+#endif /* AVCODEC_NELLYMOSER_H */
diff --git a/libavcodec/opt.h b/libavcodec/opt.h
index e754bb93d8..70de27d192 100644
--- a/libavcodec/opt.h
+++ b/libavcodec/opt.h
@@ -13,4 +13,4 @@
 #include "libavutil/opt.h"
 #endif
 
-#endif
+#endif /* AVCODEC_OPT_H */
diff --git a/libavcodec/pcm_tablegen.h b/libavcodec/pcm_tablegen.h
index 838052e0d6..79d6561646 100644
--- a/libavcodec/pcm_tablegen.h
+++ b/libavcodec/pcm_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef PCM_TABLEGEN_H
-#define PCM_TABLEGEN_H
+#ifndef AVCODEC_PCM_TABLEGEN_H
+#define AVCODEC_PCM_TABLEGEN_H
 
 #include <stdint.h>
 #include "libavutil/attributes.h"
@@ -116,4 +116,4 @@ static void pcm_ulaw_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* PCM_TABLEGEN_H */
+#endif /* AVCODEC_PCM_TABLEGEN_H */
diff --git a/libavcodec/qdm2_tablegen.h b/libavcodec/qdm2_tablegen.h
index 769d53bb26..b2bb294f58 100644
--- a/libavcodec/qdm2_tablegen.h
+++ b/libavcodec/qdm2_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef QDM2_TABLEGEN_H
-#define QDM2_TABLEGEN_H
+#ifndef AVCODEC_QDM2_TABLEGEN_H
+#define AVCODEC_QDM2_TABLEGEN_H
 
 #include <stdint.h>
 #include <math.h>
@@ -99,4 +99,4 @@ static av_cold void init_noise_samples(void) {
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* QDM2_TABLEGEN_H */
+#endif /* AVCODEC_QDM2_TABLEGEN_H */
diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
index 7572c6c76d..8ff620fb59 100644
--- a/libavcodec/rdft.h
+++ b/libavcodec/rdft.h
@@ -71,4 +71,4 @@ void ff_rdft_end(RDFTContext *s);
 void ff_rdft_init_arm(RDFTContext *s);
 
 
-#endif
+#endif /* AVCODEC_RDFT_H */
diff --git a/libavcodec/sh4/dsputil_sh4.h b/libavcodec/sh4/dsputil_sh4.h
index 1a8b7afaed..5abe34557b 100644
--- a/libavcodec/sh4/dsputil_sh4.h
+++ b/libavcodec/sh4/dsputil_sh4.h
@@ -25,4 +25,4 @@
 void idct_sh4(DCTELEM *block);
 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
 
-#endif
+#endif /* AVCODEC_SH4_DSPUTIL_SH4_H */
diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h
index 2ed386a32d..eefe5bfe7f 100644
--- a/libavcodec/sinewin.h
+++ b/libavcodec/sinewin.h
@@ -56,4 +56,4 @@ extern SINETABLE(4096);
 
 extern SINETABLE_CONST float * const ff_sine_windows[13];
 
-#endif
+#endif /* AVCODEC_SINEWIN_H */
diff --git a/libavcodec/sinewin_tablegen.h b/libavcodec/sinewin_tablegen.h
index 91c26c1551..720f1ab6b8 100644
--- a/libavcodec/sinewin_tablegen.h
+++ b/libavcodec/sinewin_tablegen.h
@@ -20,6 +20,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVCODEC_SINEWIN_TABLEGEN_H
+#define AVCODEC_SINEWIN_TABLEGEN_H
+
 #include <assert.h>
 // do not use libavutil/libm.h since this is compiled both
 // for the host and the target and config.h is only valid for the target
@@ -58,3 +61,5 @@ av_cold void ff_init_ff_sine_windows(int index) {
     ff_sine_window_init(ff_sine_windows[index], 1 << index);
 #endif
 }
+
+#endif /* AVCODEC_SINEWIN_TABLEGEN_H */
diff --git a/libavcodec/sparc/dsputil_vis.h b/libavcodec/sparc/dsputil_vis.h
index b590e59361..4be86e25e0 100644
--- a/libavcodec/sparc/dsputil_vis.h
+++ b/libavcodec/sparc/dsputil_vis.h
@@ -26,4 +26,4 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data);
 void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data);
 void ff_simple_idct_vis(DCTELEM *data);
 
-#endif
+#endif /* AVCODEC_SPARC_DSPUTIL_VIS_H */
diff --git a/libavcodec/targa.h b/libavcodec/targa.h
index d7c3f451a1..f4ef5537b1 100644
--- a/libavcodec/targa.h
+++ b/libavcodec/targa.h
@@ -16,6 +16,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVCODEC_TARGA_H
+#define AVCODEC_TARGA_H
+
 /**
  * @file
  * targa file common definitions
@@ -34,3 +37,5 @@ enum TargaCompr {
     TGA_BW     = 3, // black & white or grayscale
     TGA_RLE    = 8, // flag pointing that data is RLE-coded
 };
+
+#endif /* AVCODEC_TARGA_H */
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 3a6eee52d4..5a96cd436c 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -239,4 +239,4 @@ typedef struct {
     AVFrame frames[5];
 } VP8Context;
 
-#endif
+#endif /* AVCODEC_VP8_H */
diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h
index 6d1c070a05..4ea4581bc9 100644
--- a/libavcodec/vp8data.h
+++ b/libavcodec/vp8data.h
@@ -685,4 +685,4 @@ static const uint8_t vp8_mv_default_prob[2][19] = {
       128, 130, 130,  74, 148, 180, 203, 236, 254, 254 }
 };
 
-#endif
+#endif /* AVCODEC_VP8DATA_H */
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index e6eace235d..c6379050d9 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -36,4 +36,4 @@ void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
 void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
 
-#endif
+#endif /* AVCODEC_X86_FFT_H */
diff --git a/libavcodec/xvmc_internal.h b/libavcodec/xvmc_internal.h
index 7a4e908df9..3c6aed8361 100644
--- a/libavcodec/xvmc_internal.h
+++ b/libavcodec/xvmc_internal.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_INTERNAL_XVMC_H
-#define AVCODEC_INTERNAL_XVMC_H
+#ifndef AVCODEC_XVMC_INTERNAL_H
+#define AVCODEC_XVMC_INTERNAL_H
 
 #include "avcodec.h"
 #include "mpegvideo.h"
@@ -30,4 +30,4 @@ int  ff_xvmc_field_start(MpegEncContext*s, AVCodecContext *avctx);
 void ff_xvmc_field_end(MpegEncContext *s);
 void ff_xvmc_decode_mb(MpegEncContext *s);
 
-#endif /* AVCODEC_INTERNAL_XVMC_H */
+#endif /* AVCODEC_XVMC_INTERNAL_H */
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index c126cae093..33e93e27fc 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -859,4 +859,4 @@ static inline void avfilter_insert_outpad(AVFilterContext *f, unsigned index,
                         &f->output_pads, &f->outputs, p);
 }
 
-#endif  /* AVFILTER_AVFILTER_H */
+#endif /* AVFILTER_AVFILTER_H */
diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h
index 801e50176f..a0f6b2e01f 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/avfiltergraph.h
@@ -120,4 +120,4 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut *inputs, AVFilterInOut *outputs,
                          AVClass *log_ctx);
 
-#endif  /* AVFILTER_AVFILTERGRAPH_H */
+#endif /* AVFILTER_AVFILTERGRAPH_H */
diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index 0406a0d27e..64b3f3b865 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h
@@ -52,4 +52,4 @@ int ff_avfilter_graph_config_formats(AVFilterGraph *graphctx, AVClass *log_ctx);
 /** default handler for freeing audio/video buffer when there are no references left */
 void ff_avfilter_default_free_buffer(AVFilterBuffer *buf);
 
-#endif  /* AVFILTER_INTERNAL_H */
+#endif /* AVFILTER_INTERNAL_H */
diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index c7fc3824e0..6867f81e1c 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -19,9 +19,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVFILTER_VSRC_BUFFER_H
+#define AVFILTER_VSRC_BUFFER_H
+
 #include "libavcodec/avcodec.h" /* AVFrame */
 #include "avfilter.h"
 
 int av_vsrc_buffer_add_frame(AVFilterContext *buffer_filter, AVFrame *frame,
                              int64_t pts, AVRational pixel_aspect);
 
+#endif /* AVFILTER_VSRC_BUFFER_H */
diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h
index 6630aaf61d..1369c43891 100644
--- a/libavformat/avio_internal.h
+++ b/libavformat/avio_internal.h
@@ -98,4 +98,4 @@ int ffio_open_dyn_packet_buf(AVIOContext **s, int max_packet_size);
  */
 int ffio_fdopen(AVIOContext **s, URLContext *h);
 
-#endif // AVFORMAT_AVIO_INTERNAL_H
+#endif /* AVFORMAT_AVIO_INTERNAL_H */
diff --git a/libavformat/ffmeta.h b/libavformat/ffmeta.h
index bce272a087..a5380ca13d 100644
--- a/libavformat/ffmeta.h
+++ b/libavformat/ffmeta.h
@@ -19,11 +19,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFORMAT_META_H
-#define AVFORMAT_META_H
+#ifndef AVFORMAT_FFMETA_H
+#define AVFORMAT_FFMETA_H
 
 #define ID_STRING  ";FFMETADATA"
 #define ID_CHAPTER "[CHAPTER]"
 #define ID_STREAM  "[STREAM]"
 
-#endif /* AVFORMAT_META_H */
+#endif /* AVFORMAT_FFMETA_H */
diff --git a/libavformat/mms.h b/libavformat/mms.h
index 12e9ef0962..36e772c7f9 100644
--- a/libavformat/mms.h
+++ b/libavformat/mms.h
@@ -60,4 +60,5 @@ typedef struct {
 int ff_mms_asf_header_parser(MMSContext * mms);
 int ff_mms_read_data(MMSContext *mms, uint8_t *buf, const int size);
 int ff_mms_read_header(MMSContext * mms, uint8_t * buf, const int size);
-#endif
+
+#endif /* AVFORMAT_MMS_H */
diff --git a/libavformat/spdif.h b/libavformat/spdif.h
index dedb4e8832..b2a6b63be4 100644
--- a/libavformat/spdif.h
+++ b/libavformat/spdif.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVFORMAT_SPDIF_H
+#define AVFORMAT_SPDIF_H
+
 #include <stdint.h>
 
 #define SYNCWORD1 0xF872
@@ -55,3 +58,5 @@ static const uint16_t spdif_mpeg_pkt_offset[2][3] = {
 };
 
 void ff_spdif_bswap_buf16(uint16_t *dst, const uint16_t *src, int w);
+
+#endif /* AVFORMAT_SPDIF_H */
diff --git a/libavformat/url.h b/libavformat/url.h
index c5732c64c6..caafe07cce 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -173,4 +173,4 @@ int ffurl_register_protocol(URLProtocol *protocol, int size);
 int ff_udp_set_remote_url(URLContext *h, const char *uri);
 int ff_udp_get_local_port(URLContext *h);
 
-#endif //AVFORMAT_URL_H
+#endif /* AVFORMAT_URL_H */
diff --git a/libavformat/version.h b/libavformat/version.h
index 22b5dc9791..63f419125b 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -69,4 +69,4 @@
 #define FF_API_SDP_CREATE              (LIBAVFORMAT_VERSION_MAJOR < 54)
 #endif
 
-#endif //AVFORMAT_VERSION_H
+#endif /* AVFORMAT_VERSION_H */
diff --git a/libavutil/avassert.h b/libavutil/avassert.h
index 87333132fd..b223d26e8d 100644
--- a/libavutil/avassert.h
+++ b/libavutil/avassert.h
@@ -63,4 +63,4 @@
 #define av_assert2(cond) ((void)0)
 #endif
 
-#endif
+#endif /* AVUTIL_AVASSERT_H */
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 11ba368678..777cdc01d1 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -51,4 +51,4 @@ int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
 int ff_get_cpu_flags_x86(void);
 
-#endif  /* AVUTIL_CPU_H */
+#endif /* AVUTIL_CPU_H */

From 95eb2e3a3819e8b6d87940bfa41bf1ea3ae68f5a Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sat, 21 May 2011 14:24:50 +0200
Subject: [PATCH 174/830] DirectShow capture support Signed-off-by: Michael
 Niedermayer <michaelni@gmx.at>

---
 configure                          |   4 +
 libavdevice/Makefile               |   3 +
 libavdevice/alldevices.c           |   1 +
 libavdevice/dshow.c                | 646 +++++++++++++++++++++++++++++
 libavdevice/dshow.h                | 266 ++++++++++++
 libavdevice/dshow_common.c         | 141 +++++++
 libavdevice/dshow_enummediatypes.c | 103 +++++
 libavdevice/dshow_enumpins.c       |  99 +++++
 libavdevice/dshow_filter.c         | 196 +++++++++
 libavdevice/dshow_pin.c            | 361 ++++++++++++++++
 libavdevice/vfwcap.c               |   2 -
 11 files changed, 1820 insertions(+), 2 deletions(-)
 create mode 100644 libavdevice/dshow.c
 create mode 100644 libavdevice/dshow.h
 create mode 100644 libavdevice/dshow_common.c
 create mode 100644 libavdevice/dshow_enummediatypes.c
 create mode 100644 libavdevice/dshow_enumpins.c
 create mode 100644 libavdevice/dshow_filter.c
 create mode 100644 libavdevice/dshow_pin.c

diff --git a/configure b/configure
index c41a949b54..701d7e9ffa 100755
--- a/configure
+++ b/configure
@@ -1463,6 +1463,8 @@ w64_demuxer_deps="wav_demuxer"
 alsa_indev_deps="alsa_asoundlib_h snd_pcm_htimestamp"
 alsa_outdev_deps="alsa_asoundlib_h"
 bktr_indev_deps_any="dev_bktr_ioctl_bt848_h machine_ioctl_bt848_h dev_video_bktr_ioctl_bt848_h dev_ic_bt8xx_h"
+dshow_indev_deps="IBaseFilter"
+dshow_indev_extralibs="-lpsapi -lole32 -lstrmiids -luuid"
 dv1394_indev_deps="dv1394 dv_demuxer"
 fbdev_indev_deps="linux_fb_h"
 jack_indev_deps="jack_jack_h sem_timedwait"
@@ -2979,6 +2981,8 @@ check_func_headers "windows.h vfw.h" capCreateCaptureWindow "$vfwcap_indev_extra
 # w32api 3.12 had it defined wrong
 check_cpp_condition vfw.h "WM_CAP_DRIVER_CONNECT > WM_USER" && enable vfwcap_defines
 
+check_type "dshow.h" IBaseFilter
+
 # check for ioctl_meteor.h, ioctl_bt848.h and alternatives
 { check_header dev/bktr/ioctl_meteor.h &&
   check_header dev/bktr/ioctl_bt848.h; } ||
diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index 5cfc5e8ecc..4bcb5a3ae6 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@@ -13,6 +13,9 @@ OBJS-$(CONFIG_ALSA_INDEV)                += alsa-audio-common.o \
 OBJS-$(CONFIG_ALSA_OUTDEV)               += alsa-audio-common.o \
                                             alsa-audio-enc.o
 OBJS-$(CONFIG_BKTR_INDEV)                += bktr.o
+OBJS-$(CONFIG_DSHOW_INDEV)               += dshow.o dshow_enummediatypes.o \
+                                            dshow_enumpins.o dshow_filter.o \
+                                            dshow_pin.o dshow_common.o
 OBJS-$(CONFIG_DV1394_INDEV)              += dv1394.o
 OBJS-$(CONFIG_FBDEV_INDEV)               += fbdev.o
 OBJS-$(CONFIG_JACK_INDEV)                += jack_audio.o
diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index a0c9b08c6f..3997c7380b 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c
@@ -41,6 +41,7 @@ void avdevice_register_all(void)
     /* devices */
     REGISTER_INOUTDEV (ALSA, alsa);
     REGISTER_INDEV    (BKTR, bktr);
+    REGISTER_INDEV    (DSHOW, dshow);
     REGISTER_INDEV    (DV1394, dv1394);
     REGISTER_INDEV    (FBDEV, fbdev);
     REGISTER_INDEV    (JACK, jack);
diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
new file mode 100644
index 0000000000..ef21af796a
--- /dev/null
+++ b/libavdevice/dshow.c
@@ -0,0 +1,646 @@
+/*
+ * Directshow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavformat/avformat.h"
+#include "libavformat/timefilter.h"
+
+#include "dshow.h"
+
+struct dshow_ctx {
+    IGraphBuilder *graph;
+
+    char *device_name[2];
+
+    IBaseFilter *device_filter[2];
+    IPin        *device_pin[2];
+    libAVFilter *capture_filter[2];
+    libAVPin    *capture_pin[2];
+
+    HANDLE mutex;
+    HANDLE event;
+    AVPacketList *pktl;
+
+    unsigned int curbufsize;
+    unsigned int video_frame_num;
+
+    IMediaControl *control;
+
+    TimeFilter *timefilter;
+};
+
+static enum PixelFormat dshow_pixfmt(DWORD biCompression, WORD biBitCount)
+{
+    switch(biCompression) {
+    case MKTAG('U', 'Y', 'V', 'Y'):
+        return PIX_FMT_UYVY422;
+    case MKTAG('Y', 'U', 'Y', '2'):
+        return PIX_FMT_YUYV422;
+    case MKTAG('I', '4', '2', '0'):
+        return PIX_FMT_YUV420P;
+    case BI_RGB:
+        switch(biBitCount) { /* 1-8 are untested */
+            case 1:
+                return PIX_FMT_MONOWHITE;
+            case 4:
+                return PIX_FMT_RGB4;
+            case 8:
+                return PIX_FMT_RGB8;
+            case 16:
+                return PIX_FMT_RGB555;
+            case 24:
+                return PIX_FMT_BGR24;
+            case 32:
+                return PIX_FMT_RGB32;
+        }
+    }
+    return PIX_FMT_NONE;
+}
+
+static enum CodecID dshow_codecid(DWORD biCompression)
+{
+    switch(biCompression) {
+    case MKTAG('d', 'v', 's', 'd'):
+        return CODEC_ID_DVVIDEO;
+    case MKTAG('M', 'J', 'P', 'G'):
+    case MKTAG('m', 'j', 'p', 'g'):
+        return CODEC_ID_MJPEG;
+    }
+    return CODEC_ID_NONE;
+}
+
+static int
+dshow_read_close(AVFormatContext *s)
+{
+    struct dshow_ctx *ctx = s->priv_data;
+    AVPacketList *pktl;
+
+    if (ctx->control) {
+        IMediaControl_Stop(ctx->control);
+        IMediaControl_Release(ctx->control);
+    }
+    if (ctx->graph)
+        IGraphBuilder_Release(ctx->graph);
+
+    /* FIXME remove filters from graph */
+    /* FIXME disconnect pins */
+    if (ctx->capture_pin[VideoDevice])
+        libAVPin_Release(ctx->capture_pin[VideoDevice]);
+    if (ctx->capture_pin[AudioDevice])
+        libAVPin_Release(ctx->capture_pin[AudioDevice]);
+    if (ctx->capture_filter[VideoDevice])
+        libAVFilter_Release(ctx->capture_filter[VideoDevice]);
+    if (ctx->capture_filter[AudioDevice])
+        libAVFilter_Release(ctx->capture_filter[AudioDevice]);
+
+    if (ctx->device_pin[VideoDevice])
+        IPin_Release(ctx->device_pin[VideoDevice]);
+    if (ctx->device_pin[AudioDevice])
+        IPin_Release(ctx->device_pin[AudioDevice]);
+    if (ctx->device_filter[VideoDevice])
+        IBaseFilter_Release(ctx->device_filter[VideoDevice]);
+    if (ctx->device_filter[AudioDevice])
+        IBaseFilter_Release(ctx->device_filter[AudioDevice]);
+
+    if (ctx->device_name[0])
+        av_free(ctx->device_name[0]);
+    if (ctx->device_name[1])
+        av_free(ctx->device_name[1]);
+
+    if(ctx->mutex)
+        CloseHandle(ctx->mutex);
+    if(ctx->event)
+        CloseHandle(ctx->event);
+
+    pktl = ctx->pktl;
+    while (pktl) {
+        AVPacketList *next = pktl->next;
+        av_destruct_packet(&pktl->pkt);
+        av_free(pktl);
+        pktl = next;
+    }
+
+    return 0;
+}
+
+static char *dup_wchar_to_utf8(wchar_t *w)
+{
+    char *s = NULL;
+    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
+    s = av_malloc(l);
+    if (s)
+        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
+    return s;
+}
+
+static int shall_we_drop(AVFormatContext *s)
+{
+    struct dshow_ctx *ctx = s->priv_data;
+    const uint8_t dropscore[] = {62, 75, 87, 100};
+    const int ndropscores = FF_ARRAY_ELEMS(dropscore);
+    unsigned int buffer_fullness = (ctx->curbufsize*100)/s->max_picture_buffer;
+
+    if(dropscore[++ctx->video_frame_num%ndropscores] <= buffer_fullness) {
+        av_log(s, AV_LOG_ERROR,
+              "real-time buffer %d%% full! frame dropped!\n", buffer_fullness);
+        return 1;
+    }
+
+    return 0;
+}
+
+static void
+callback(void *priv_data, int index, uint8_t *buf, int buf_size, int64_t time)
+{
+    AVFormatContext *s = priv_data;
+    struct dshow_ctx *ctx = s->priv_data;
+    AVPacketList **ppktl, *pktl_next;
+
+//    dump_videohdr(s, vdhdr);
+
+    if(shall_we_drop(s))
+        return;
+
+    WaitForSingleObject(ctx->mutex, INFINITE);
+
+    pktl_next = av_mallocz(sizeof(AVPacketList));
+    if(!pktl_next)
+        goto fail;
+
+    if(av_new_packet(&pktl_next->pkt, buf_size) < 0) {
+        av_free(pktl_next);
+        goto fail;
+    }
+
+    pktl_next->pkt.stream_index = index;
+    pktl_next->pkt.pts = time;
+    memcpy(pktl_next->pkt.data, buf, buf_size);
+
+    for(ppktl = &ctx->pktl ; *ppktl ; ppktl = &(*ppktl)->next);
+    *ppktl = pktl_next;
+
+    ctx->curbufsize += buf_size;
+
+    SetEvent(ctx->event);
+    ReleaseMutex(ctx->mutex);
+
+    return;
+fail:
+    ReleaseMutex(ctx->mutex);
+    return;
+}
+
+static int
+dshow_open_device(AVFormatContext *avctx, ICreateDevEnum *devenum,
+                  enum dshowDeviceType devtype)
+{
+    struct dshow_ctx *ctx = avctx->priv_data;
+    IBaseFilter *device_filter = NULL;
+    IEnumMoniker *classenum = NULL;
+    IGraphBuilder *graph = ctx->graph;
+    IEnumPins *pins = 0;
+    IMoniker *m = NULL;
+    IPin *device_pin = NULL;
+    libAVPin *capture_pin = NULL;
+    libAVFilter *capture_filter = NULL;
+    const char *device_name = ctx->device_name[devtype];
+    int ret = AVERROR(EIO);
+    IPin *pin;
+    int r, i;
+
+    const GUID *device_guid[2] = { &CLSID_VideoInputDeviceCategory,
+                                   &CLSID_AudioInputDeviceCategory };
+    const GUID *mediatype[2] = { &MEDIATYPE_Video, &MEDIATYPE_Audio };
+    const char *devtypename = (devtype == VideoDevice) ? "video" : "audio";
+    const wchar_t *filter_name[2] = { L"Audio capture filter", L"Video capture filter" };
+
+    r = ICreateDevEnum_CreateClassEnumerator(devenum, device_guid[devtype],
+                                             (IEnumMoniker **) &classenum, 0);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not enumerate %s devices.\n",
+               devtypename);
+        goto error;
+    }
+
+    while (IEnumMoniker_Next(classenum, 1, &m, NULL) == S_OK && !device_filter) {
+        IPropertyBag *bag = NULL;
+        char *buf = NULL;
+        VARIANT var;
+
+        r = IMoniker_BindToStorage(m, 0, 0, &IID_IPropertyBag, (void *) &bag);
+        if (r != S_OK)
+            goto fail1;
+
+        var.vt = VT_BSTR;
+        r = IPropertyBag_Read(bag, L"FriendlyName", &var, NULL);
+        if (r != S_OK)
+            goto fail1;
+
+        buf = dup_wchar_to_utf8(var.bstrVal);
+
+        if (strcmp(device_name, buf))
+            goto fail1;
+
+        IMoniker_BindToObject(m, 0, 0, &IID_IBaseFilter, (void *) &device_filter);
+
+fail1:
+        if (buf)
+            av_free(buf);
+        if (bag)
+            IPropertyBag_Release(bag);
+        IMoniker_Release(m);
+    }
+
+    if (!device_filter) {
+        av_log(avctx, AV_LOG_ERROR, "Could not find %s device.\n",
+               devtypename);
+        goto error;
+    }
+    ctx->device_filter [devtype] = device_filter;
+
+    r = IGraphBuilder_AddFilter(graph, device_filter, NULL);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not add device filter to graph.\n");
+        goto error;
+    }
+
+    r = IBaseFilter_EnumPins(device_filter, &pins);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not enumerate pins.\n");
+        goto error;
+    }
+
+    i = 0;
+    while (IEnumPins_Next(pins, 1, &pin, NULL) == S_OK && !device_pin) {
+        IKsPropertySet *p = NULL;
+        IEnumMediaTypes *types;
+        PIN_INFO info = {0};
+        AM_MEDIA_TYPE *type;
+        GUID category;
+        DWORD r2;
+
+        IPin_QueryPinInfo(pin, &info);
+        IBaseFilter_Release(info.pFilter);
+
+        if (info.dir != PINDIR_OUTPUT)
+            goto next;
+        if (IPin_QueryInterface(pin, &IID_IKsPropertySet, (void **) &p) != S_OK)
+            goto next;
+        if (IKsPropertySet_Get(p, &AMPROPSETID_Pin, AMPROPERTY_PIN_CATEGORY,
+                               NULL, 0, &category, sizeof(GUID), &r2) != S_OK)
+            goto next;
+        if (!IsEqualGUID(&category, &PIN_CATEGORY_CAPTURE))
+            goto next;
+
+        if (IPin_EnumMediaTypes(pin, &types) != S_OK)
+            goto next;
+
+        IEnumMediaTypes_Reset(types);
+        while (IEnumMediaTypes_Next(types, 1, &type, NULL) == S_OK && !device_pin) {
+            if (IsEqualGUID(&type->majortype, mediatype[devtype])) {
+                device_pin = pin;
+                goto next;
+            }
+            CoTaskMemFree(type);
+        }
+
+next:
+        if (types)
+            IEnumMediaTypes_Release(types);
+        if (p)
+            IKsPropertySet_Release(p);
+        if (device_pin != pin)
+            IPin_Release(pin);
+    }
+
+    if (!device_pin) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not find output pin from %s capture device.\n", devtypename);
+        goto error;
+    }
+    ctx->device_pin[devtype] = device_pin;
+
+    capture_filter = libAVFilter_Create(avctx, callback, devtype);
+    if (!capture_filter) {
+        av_log(avctx, AV_LOG_ERROR, "Could not create grabber filter.\n");
+        goto error;
+    }
+    ctx->capture_filter[devtype] = capture_filter;
+
+    r = IGraphBuilder_AddFilter(graph, (IBaseFilter *) capture_filter,
+                                filter_name[devtype]);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not add capture filter to graph\n");
+        goto error;
+    }
+
+    libAVPin_AddRef(capture_filter->pin);
+    capture_pin = capture_filter->pin;
+    ctx->capture_pin[devtype] = capture_pin;
+
+    r = IGraphBuilder_ConnectDirect(graph, device_pin, (IPin *) capture_pin, NULL);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not connect pins\n");
+        goto error;
+    }
+
+    ret = 0;
+
+error:
+    if (pins)
+        IEnumPins_Release(pins);
+    if (classenum)
+        IEnumMoniker_Release(classenum);
+
+    return ret;
+}
+
+static enum CodecID waveform_codec_id(enum AVSampleFormat sample_fmt)
+{
+    switch (sample_fmt) {
+    case AV_SAMPLE_FMT_U8:  return CODEC_ID_PCM_U8;
+    case AV_SAMPLE_FMT_S16: return CODEC_ID_PCM_S16LE;
+    case AV_SAMPLE_FMT_S32: return CODEC_ID_PCM_S32LE;
+    default:                return CODEC_ID_NONE; /* Should never happen. */
+    }
+}
+
+static enum SampleFormat sample_fmt_bits_per_sample(int bits)
+{
+    switch (bits) {
+    case 8:  return AV_SAMPLE_FMT_U8;
+    case 16: return AV_SAMPLE_FMT_S16;
+    case 32: return AV_SAMPLE_FMT_S32;
+    default: return AV_SAMPLE_FMT_NONE; /* Should never happen. */
+    }
+}
+
+static int
+dshow_add_device(AVFormatContext *avctx, AVFormatParameters *ap,
+                 enum dshowDeviceType devtype)
+{
+    struct dshow_ctx *ctx = avctx->priv_data;
+    AM_MEDIA_TYPE type;
+    AVCodecContext *codec;
+    AVStream *st;
+    int ret = AVERROR(EIO);
+
+    st = av_new_stream(avctx, devtype);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    ctx->capture_filter[devtype]->stream_index = st->index;
+
+    libAVPin_ConnectionMediaType(ctx->capture_pin[devtype], &type);
+
+    codec = st->codec;
+    if (devtype == VideoDevice) {
+        BITMAPINFOHEADER *bih = NULL;
+
+        if (IsEqualGUID(&type.formattype, &FORMAT_VideoInfo)) {
+            VIDEOINFOHEADER *v = (void *) type.pbFormat;
+            bih = &v->bmiHeader;
+        } else if (IsEqualGUID(&type.formattype, &FORMAT_VideoInfo2)) {
+            VIDEOINFOHEADER2 *v = (void *) type.pbFormat;
+            bih = &v->bmiHeader;
+        }
+        if (!bih) {
+            av_log(avctx, AV_LOG_ERROR, "Could not get media type.\n");
+            goto error;
+        }
+
+        codec->time_base  = ap->time_base;
+        codec->codec_type = AVMEDIA_TYPE_VIDEO;
+        codec->width      = bih->biWidth;
+        codec->height     = bih->biHeight;
+        codec->pix_fmt    = dshow_pixfmt(bih->biCompression, bih->biBitCount);
+        if (codec->pix_fmt == PIX_FMT_NONE) {
+            codec->codec_id = dshow_codecid(bih->biCompression);
+            if (codec->codec_id == CODEC_ID_NONE) {
+                av_log(avctx, AV_LOG_ERROR, "Unknown compression type. "
+                                 "Please report verbose (-v 9) debug information.\n");
+                dshow_read_close(avctx);
+                return AVERROR_PATCHWELCOME;
+            }
+            codec->bits_per_coded_sample = bih->biBitCount;
+        } else {
+            codec->codec_id = CODEC_ID_RAWVIDEO;
+            if (bih->biCompression == BI_RGB) {
+                codec->bits_per_coded_sample = bih->biBitCount;
+                codec->extradata = av_malloc(9 + FF_INPUT_BUFFER_PADDING_SIZE);
+                if (codec->extradata) {
+                    codec->extradata_size = 9;
+                    memcpy(codec->extradata, "BottomUp", 9);
+                }
+            }
+        }
+    } else {
+        WAVEFORMATEX *fx = NULL;
+
+        if (IsEqualGUID(&type.formattype, &FORMAT_WaveFormatEx)) {
+            fx = (void *) type.pbFormat;
+        }
+        if (!fx) {
+            av_log(avctx, AV_LOG_ERROR, "Could not get media type.\n");
+            goto error;
+        }
+
+        codec->codec_type  = CODEC_TYPE_AUDIO;
+        codec->sample_fmt  = sample_fmt_bits_per_sample(fx->wBitsPerSample);
+        codec->codec_id    = waveform_codec_id(codec->sample_fmt);
+        codec->sample_rate = fx->nSamplesPerSec;
+        codec->channels    = fx->nChannels;
+    }
+
+    av_set_pts_info(st, 64, 1, 10000000);
+
+    ret = 0;
+
+error:
+    return ret;
+}
+
+static int parse_device_name(AVFormatContext *avctx)
+{
+    struct dshow_ctx *ctx = avctx->priv_data;
+    char **device_name = ctx->device_name;
+    char *name = av_strdup(avctx->filename);
+    char *tmp = name;
+    int ret = 1;
+    char *type;
+
+    while ((type = strtok(tmp, "="))) {
+        char *token = strtok(NULL, ":");
+        tmp = NULL;
+
+        if        (!strcmp(type, "video")) {
+            device_name[0] = token;
+        } else if (!strcmp(type, "audio")) {
+            device_name[1] = token;
+        } else {
+            device_name[0] = NULL;
+            device_name[1] = NULL;
+            break;
+        }
+    }
+
+    if (!device_name[0] && !device_name[1]) {
+        ret = 0;
+    } else {
+        if (device_name[0])
+            device_name[0] = av_strdup(device_name[0]);
+        if (device_name[1])
+            device_name[1] = av_strdup(device_name[1]);
+    }
+
+    av_free(name);
+    return ret;
+}
+
+static int dshow_read_header(AVFormatContext *avctx, AVFormatParameters *ap)
+{
+    struct dshow_ctx *ctx = avctx->priv_data;
+    IGraphBuilder *graph = NULL;
+    ICreateDevEnum *devenum = NULL;
+    IMediaControl *control = NULL;
+    int ret = AVERROR(EIO);
+    int r;
+
+    if (!parse_device_name(avctx)) {
+        av_log(avctx, AV_LOG_ERROR, "Malformed dshow input string.\n");
+        goto error;
+    }
+
+    CoInitialize(0);
+
+    r = CoCreateInstance(&CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER,
+                         &IID_IGraphBuilder, (void **) &graph);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not create capture graph.\n");
+        goto error;
+    }
+    ctx->graph = graph;
+
+    r = CoCreateInstance(&CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER,
+                         &IID_ICreateDevEnum, (void **) &devenum);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not enumerate system devices.\n");
+        goto error;
+    }
+
+    if (ctx->device_name[VideoDevice]) {
+        ret = dshow_open_device(avctx, devenum, VideoDevice);
+        if (ret < 0)
+            goto error;
+        ret = dshow_add_device(avctx, ap, VideoDevice);
+        if (ret < 0)
+            goto error;
+    }
+    if (ctx->device_name[AudioDevice]) {
+        ret = dshow_open_device(avctx, devenum, AudioDevice);
+        if (ret < 0)
+            goto error;
+        ret = dshow_add_device(avctx, ap, AudioDevice);
+        if (ret < 0)
+            goto error;
+    }
+
+    ctx->mutex = CreateMutex(NULL, 0, NULL);
+    if (!ctx->mutex) {
+        av_log(avctx, AV_LOG_ERROR, "Could not create Mutex\n");
+        goto error;
+    }
+    ctx->event = CreateEvent(NULL, 1, 0, NULL);
+    if (!ctx->event) {
+        av_log(avctx, AV_LOG_ERROR, "Could not create Event\n");
+        goto error;
+    }
+
+    r = IGraphBuilder_QueryInterface(graph, &IID_IMediaControl, (void **) &control);
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not get media control.\n");
+        goto error;
+    }
+    ctx->control = control;
+
+    r = IMediaControl_Run(control);
+    if (r == S_FALSE) {
+        OAFilterState pfs;
+        r = IMediaControl_GetState(control, 0, &pfs);
+    }
+    if (r != S_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Could not run filter\n");
+        goto error;
+    }
+
+    ret = 0;
+
+error:
+
+    if (ret < 0)
+        dshow_read_close(avctx);
+
+    if (devenum)
+        ICreateDevEnum_Release(devenum);
+
+    return ret;
+}
+
+static int dshow_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    struct dshow_ctx *ctx = s->priv_data;
+    AVPacketList *pktl = NULL;
+
+    while (!pktl) {
+        WaitForSingleObject(ctx->mutex, INFINITE);
+        pktl = ctx->pktl;
+        if (ctx->pktl) {
+            *pkt = ctx->pktl->pkt;
+            ctx->pktl = ctx->pktl->next;
+            av_free(pktl);
+        }
+        ResetEvent(ctx->event);
+        ReleaseMutex(ctx->mutex);
+        if (!pktl) {
+            if (s->flags & AVFMT_FLAG_NONBLOCK) {
+                return AVERROR(EAGAIN);
+            } else {
+                WaitForSingleObject(ctx->event, INFINITE);
+            }
+        }
+    }
+
+    ctx->curbufsize -= pkt->size;
+
+    return pkt->size;
+}
+
+AVInputFormat dshow_demuxer = {
+    "dshow",
+    NULL_IF_CONFIG_SMALL("DirectShow capture"),
+    sizeof(struct dshow_ctx),
+    NULL,
+    dshow_read_header,
+    dshow_read_packet,
+    dshow_read_close,
+    .flags = AVFMT_NOFILE,
+};
diff --git a/libavdevice/dshow.h b/libavdevice/dshow.h
new file mode 100644
index 0000000000..c991c02ecb
--- /dev/null
+++ b/libavdevice/dshow.h
@@ -0,0 +1,266 @@
+/*
+ * DirectShow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DSHOWDEBUG 0
+
+#include "libavformat/avformat.h"
+
+#define COBJMACROS
+#include <windows.h>
+#include <dshow.h>
+#include <dvdmedia.h>
+
+long ff_copy_dshow_media_type(AM_MEDIA_TYPE *dst, const AM_MEDIA_TYPE *src);
+void ff_print_AM_MEDIA_TYPE(const AM_MEDIA_TYPE *type);
+void ff_printGUID(const GUID *g);
+
+#if DSHOWDEBUG
+extern const AVClass *ff_dshow_context_class_ptr;
+#define dshowdebug(...) av_log(&ff_dshow_context_class_ptr, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+#define dshowdebug(...)
+#endif
+
+static inline void nothing(void *foo)
+{
+}
+
+struct GUIDoffset {
+    const GUID *iid;
+    int offset;
+};
+
+enum dshowDeviceType {
+    VideoDevice = 0,
+    AudioDevice = 1,
+};
+
+#define DECLARE_QUERYINTERFACE(class, ...)                                   \
+long WINAPI                                                                  \
+class##_QueryInterface(class *this, const GUID *riid, void **ppvObject)      \
+{                                                                            \
+    struct GUIDoffset ifaces[] = __VA_ARGS__;                                \
+    int i;                                                                   \
+    dshowdebug(AV_STRINGIFY(class)"_QueryInterface(%p, %p, %p)\n", this, riid, ppvObject); \
+    ff_printGUID(riid);                                                      \
+    if (!ppvObject)                                                          \
+        return E_POINTER;                                                    \
+    for (i = 0; i < sizeof(ifaces)/sizeof(ifaces[0]); i++) {                 \
+        if (IsEqualGUID(riid, ifaces[i].iid)) {                              \
+            void *obj = (void *) ((uint8_t *) this + ifaces[i].offset);      \
+            class##_AddRef(this);                                            \
+            dshowdebug("\tfound %d with offset %d\n", i, ifaces[i].offset);  \
+            *ppvObject = (void *) obj;                                       \
+            return S_OK;                                                     \
+        }                                                                    \
+    }                                                                        \
+    dshowdebug("\tE_NOINTERFACE\n");                                         \
+    *ppvObject = NULL;                                                       \
+    return E_NOINTERFACE;                                                    \
+}
+#define DECLARE_ADDREF(class)                                                \
+unsigned long WINAPI                                                         \
+class##_AddRef(class *this)                                                  \
+{                                                                            \
+    dshowdebug(AV_STRINGIFY(class)"_AddRef(%p)\t%ld\n", this, this->ref+1);  \
+    return InterlockedIncrement(&this->ref);                                 \
+}
+#define DECLARE_RELEASE(class)                                               \
+unsigned long WINAPI                                                         \
+class##_Release(class *this)                                                 \
+{                                                                            \
+    long ref = InterlockedDecrement(&this->ref);                             \
+    dshowdebug(AV_STRINGIFY(class)"_Release(%p)\t%ld\n", this, ref);         \
+    if (!ref)                                                                \
+        class##_Destroy(this);                                               \
+    return ref;                                                              \
+}
+
+#define DECLARE_DESTROY(class, func)                                         \
+void class##_Destroy(class *this)                                            \
+{                                                                            \
+    dshowdebug(AV_STRINGIFY(class)"_Destroy(%p)\n", this);                   \
+    func(this);                                                              \
+    if (this) {                                                              \
+        if (this->vtbl)                                                      \
+            CoTaskMemFree(this->vtbl);                                       \
+        CoTaskMemFree(this);                                                 \
+    }                                                                        \
+}
+#define DECLARE_CREATE(class, setup, ...)                                    \
+class *class##_Create(__VA_ARGS__)                                           \
+{                                                                            \
+    class *this = CoTaskMemAlloc(sizeof(class));                             \
+    void  *vtbl = CoTaskMemAlloc(sizeof(*this->vtbl));                       \
+    dshowdebug(AV_STRINGIFY(class)"_Create(%p)\n", this);                    \
+    if (!this || !vtbl)                                                      \
+        goto fail;                                                           \
+    ZeroMemory(this, sizeof(class));                                         \
+    ZeroMemory(vtbl, sizeof(*this->vtbl));                                   \
+    this->ref  = 1;                                                          \
+    this->vtbl = vtbl;                                                       \
+    if (!setup)                                                              \
+        goto fail;                                                           \
+    dshowdebug("created "AV_STRINGIFY(class)" %p\n", this);                  \
+    return this;                                                             \
+fail:                                                                        \
+    class##_Destroy(this);                                                   \
+    dshowdebug("could not create "AV_STRINGIFY(class)"\n");                  \
+    return NULL;                                                             \
+}
+
+#define SETVTBL(vtbl, class, fn) \
+    do { (vtbl)->fn = (void *) class##_##fn; } while(0)
+
+/*****************************************************************************
+ * Forward Declarations
+ ****************************************************************************/
+typedef struct libAVPin libAVPin;
+typedef struct libAVMemInputPin libAVMemInputPin;
+typedef struct libAVEnumPins libAVEnumPins;
+typedef struct libAVEnumMediaTypes libAVEnumMediaTypes;
+typedef struct libAVFilter libAVFilter;
+
+/*****************************************************************************
+ * libAVPin
+ ****************************************************************************/
+struct libAVPin {
+    IPinVtbl *vtbl;
+    long ref;
+    libAVFilter *filter;
+    IPin *connectedto;
+    AM_MEDIA_TYPE type;
+    IMemInputPinVtbl *imemvtbl;
+};
+
+long          WINAPI libAVPin_QueryInterface          (libAVPin *, const GUID *, void **);
+unsigned long WINAPI libAVPin_AddRef                  (libAVPin *);
+unsigned long WINAPI libAVPin_Release                 (libAVPin *);
+long          WINAPI libAVPin_Connect                 (libAVPin *, IPin *, const AM_MEDIA_TYPE *);
+long          WINAPI libAVPin_ReceiveConnection       (libAVPin *, IPin *, const AM_MEDIA_TYPE *);
+long          WINAPI libAVPin_Disconnect              (libAVPin *);
+long          WINAPI libAVPin_ConnectedTo             (libAVPin *, IPin **);
+long          WINAPI libAVPin_ConnectionMediaType     (libAVPin *, AM_MEDIA_TYPE *);
+long          WINAPI libAVPin_QueryPinInfo            (libAVPin *, PIN_INFO *);
+long          WINAPI libAVPin_QueryDirection          (libAVPin *, PIN_DIRECTION *);
+long          WINAPI libAVPin_QueryId                 (libAVPin *, wchar_t **);
+long          WINAPI libAVPin_QueryAccept             (libAVPin *, const AM_MEDIA_TYPE *);
+long          WINAPI libAVPin_EnumMediaTypes          (libAVPin *, IEnumMediaTypes **);
+long          WINAPI libAVPin_QueryInternalConnections(libAVPin *, IPin **, unsigned long *);
+long          WINAPI libAVPin_EndOfStream             (libAVPin *);
+long          WINAPI libAVPin_BeginFlush              (libAVPin *);
+long          WINAPI libAVPin_EndFlush                (libAVPin *);
+long          WINAPI libAVPin_NewSegment              (libAVPin *, REFERENCE_TIME, REFERENCE_TIME, double);
+
+long          WINAPI libAVMemInputPin_QueryInterface          (libAVMemInputPin *, const GUID *, void **);
+unsigned long WINAPI libAVMemInputPin_AddRef                  (libAVMemInputPin *);
+unsigned long WINAPI libAVMemInputPin_Release                 (libAVMemInputPin *);
+long          WINAPI libAVMemInputPin_GetAllocator            (libAVMemInputPin *, IMemAllocator **);
+long          WINAPI libAVMemInputPin_NotifyAllocator         (libAVMemInputPin *, IMemAllocator *, WINBOOL);
+long          WINAPI libAVMemInputPin_GetAllocatorRequirements(libAVMemInputPin *, ALLOCATOR_PROPERTIES *);
+long          WINAPI libAVMemInputPin_Receive                 (libAVMemInputPin *, IMediaSample *);
+long          WINAPI libAVMemInputPin_ReceiveMultiple         (libAVMemInputPin *, IMediaSample **, long, long *);
+long          WINAPI libAVMemInputPin_ReceiveCanBlock         (libAVMemInputPin *);
+
+void                 libAVPin_Destroy(libAVPin *);
+libAVPin            *libAVPin_Create (libAVFilter *filter);
+
+void                 libAVMemInputPin_Destroy(libAVMemInputPin *);
+
+/*****************************************************************************
+ * libAVEnumPins
+ ****************************************************************************/
+struct libAVEnumPins {
+    IEnumPinsVtbl *vtbl;
+    long ref;
+    int pos;
+    libAVPin *pin;
+    libAVFilter *filter;
+};
+
+long          WINAPI libAVEnumPins_QueryInterface(libAVEnumPins *, const GUID *, void **);
+unsigned long WINAPI libAVEnumPins_AddRef        (libAVEnumPins *);
+unsigned long WINAPI libAVEnumPins_Release       (libAVEnumPins *);
+long          WINAPI libAVEnumPins_Next          (libAVEnumPins *, unsigned long, IPin **, unsigned long *);
+long          WINAPI libAVEnumPins_Skip          (libAVEnumPins *, unsigned long);
+long          WINAPI libAVEnumPins_Reset         (libAVEnumPins *);
+long          WINAPI libAVEnumPins_Clone         (libAVEnumPins *, libAVEnumPins **);
+
+void                 libAVEnumPins_Destroy(libAVEnumPins *);
+libAVEnumPins       *libAVEnumPins_Create (libAVPin *pin, libAVFilter *filter);
+
+/*****************************************************************************
+ * libAVEnumMediaTypes
+ ****************************************************************************/
+struct libAVEnumMediaTypes {
+    IEnumPinsVtbl *vtbl;
+    long ref;
+    int pos;
+    AM_MEDIA_TYPE type;
+};
+
+long          WINAPI libAVEnumMediaTypes_QueryInterface(libAVEnumMediaTypes *, const GUID *, void **);
+unsigned long WINAPI libAVEnumMediaTypes_AddRef        (libAVEnumMediaTypes *);
+unsigned long WINAPI libAVEnumMediaTypes_Release       (libAVEnumMediaTypes *);
+long          WINAPI libAVEnumMediaTypes_Next          (libAVEnumMediaTypes *, unsigned long, AM_MEDIA_TYPE **, unsigned long *);
+long          WINAPI libAVEnumMediaTypes_Skip          (libAVEnumMediaTypes *, unsigned long);
+long          WINAPI libAVEnumMediaTypes_Reset         (libAVEnumMediaTypes *);
+long          WINAPI libAVEnumMediaTypes_Clone         (libAVEnumMediaTypes *, libAVEnumMediaTypes **);
+
+void                 libAVEnumMediaTypes_Destroy(libAVEnumMediaTypes *);
+libAVEnumMediaTypes *libAVEnumMediaTypes_Create(const AM_MEDIA_TYPE *type);
+
+/*****************************************************************************
+ * libAVFilter
+ ****************************************************************************/
+struct libAVFilter {
+    IBaseFilterVtbl *vtbl;
+    long ref;
+    const wchar_t *name;
+    libAVPin *pin;
+    FILTER_INFO info;
+    FILTER_STATE state;
+    IReferenceClock *clock;
+    enum dshowDeviceType type;
+    void *priv_data;
+    int stream_index;
+    int64_t start_time;
+    void (*callback)(void *priv_data, int index, uint8_t *buf, int buf_size, int64_t time);
+};
+
+long          WINAPI libAVFilter_QueryInterface (libAVFilter *, const GUID *, void **);
+unsigned long WINAPI libAVFilter_AddRef         (libAVFilter *);
+unsigned long WINAPI libAVFilter_Release        (libAVFilter *);
+long          WINAPI libAVFilter_GetClassID     (libAVFilter *, CLSID *);
+long          WINAPI libAVFilter_Stop           (libAVFilter *);
+long          WINAPI libAVFilter_Pause          (libAVFilter *);
+long          WINAPI libAVFilter_Run            (libAVFilter *, REFERENCE_TIME);
+long          WINAPI libAVFilter_GetState       (libAVFilter *, DWORD, FILTER_STATE *);
+long          WINAPI libAVFilter_SetSyncSource  (libAVFilter *, IReferenceClock *);
+long          WINAPI libAVFilter_GetSyncSource  (libAVFilter *, IReferenceClock **);
+long          WINAPI libAVFilter_EnumPins       (libAVFilter *, IEnumPins **);
+long          WINAPI libAVFilter_FindPin        (libAVFilter *, const wchar_t *, IPin **);
+long          WINAPI libAVFilter_QueryFilterInfo(libAVFilter *, FILTER_INFO *);
+long          WINAPI libAVFilter_JoinFilterGraph(libAVFilter *, IFilterGraph *, const wchar_t *);
+long          WINAPI libAVFilter_QueryVendorInfo(libAVFilter *, wchar_t **);
+
+void                 libAVFilter_Destroy(libAVFilter *);
+libAVFilter         *libAVFilter_Create (void *, void *, enum dshowDeviceType);
diff --git a/libavdevice/dshow_common.c b/libavdevice/dshow_common.c
new file mode 100644
index 0000000000..c813dc165e
--- /dev/null
+++ b/libavdevice/dshow_common.c
@@ -0,0 +1,141 @@
+/*
+ * Directshow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dshow.h"
+
+long ff_copy_dshow_media_type(AM_MEDIA_TYPE *dst, const AM_MEDIA_TYPE *src)
+{
+    uint8_t *pbFormat = NULL;
+
+    if (src->cbFormat) {
+        pbFormat = CoTaskMemAlloc(src->cbFormat);
+        if (!pbFormat)
+            return E_OUTOFMEMORY;
+        memcpy(pbFormat, src->pbFormat, src->cbFormat);
+    }
+
+    *dst = *src;
+    dst->pUnk = NULL;
+    dst->pbFormat = pbFormat;
+
+    return S_OK;
+}
+
+void ff_printGUID(const GUID *g)
+{
+#if DSHOWDEBUG
+    const uint32_t *d = (const uint32_t *) &g->Data1;
+    const uint16_t *w = (const uint16_t *) &g->Data2;
+    const uint8_t  *c = (const uint8_t  *) &g->Data4;
+
+    dshowdebug("0x%08x 0x%04x 0x%04x %02x%02x%02x%02x%02x%02x%02x%02x",
+               d[0], w[0], w[1],
+               c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]);
+#endif
+}
+
+static const char *dshow_context_to_name(void *ptr)
+{
+    return "dshow";
+}
+static const AVClass ff_dshow_context_class = { "DirectShow", dshow_context_to_name };
+const AVClass *ff_dshow_context_class_ptr = &ff_dshow_context_class;
+
+#define dstruct(pctx, sname, var, type) \
+    dshowdebug("      "#var":\t%"type"\n", sname->var)
+
+#if DSHOWDEBUG
+static void dump_bih(void *s, BITMAPINFOHEADER *bih)
+{
+    dshowdebug("      BITMAPINFOHEADER\n");
+    dstruct(s, bih, biSize, "lu");
+    dstruct(s, bih, biWidth, "ld");
+    dstruct(s, bih, biHeight, "ld");
+    dstruct(s, bih, biPlanes, "d");
+    dstruct(s, bih, biBitCount, "d");
+    dstruct(s, bih, biCompression, "lu");
+    dshowdebug("      biCompression:\t\"%.4s\"\n",
+                   (char*) &bih->biCompression);
+    dstruct(s, bih, biSizeImage, "lu");
+    dstruct(s, bih, biXPelsPerMeter, "lu");
+    dstruct(s, bih, biYPelsPerMeter, "lu");
+    dstruct(s, bih, biClrUsed, "lu");
+    dstruct(s, bih, biClrImportant, "lu");
+}
+#endif
+
+void ff_print_AM_MEDIA_TYPE(const AM_MEDIA_TYPE *type)
+{
+#if DSHOWDEBUG
+    dshowdebug("    majortype\t");
+    ff_printGUID(&type->majortype);
+    dshowdebug("\n");
+    dshowdebug("    subtype\t");
+    ff_printGUID(&type->subtype);
+    dshowdebug("\n");
+    dshowdebug("    bFixedSizeSamples\t%d\n", type->bFixedSizeSamples);
+    dshowdebug("    bTemporalCompression\t%d\n", type->bTemporalCompression);
+    dshowdebug("    lSampleSize\t%lu\n", type->lSampleSize);
+    dshowdebug("    formattype\t");
+    ff_printGUID(&type->formattype);
+    dshowdebug("\n");
+    dshowdebug("    pUnk\t%p\n", type->pUnk);
+    dshowdebug("    cbFormat\t%lu\n", type->cbFormat);
+    dshowdebug("    pbFormat\t%p\n", type->pbFormat);
+
+    if (IsEqualGUID(&type->formattype, &FORMAT_VideoInfo)) {
+        VIDEOINFOHEADER *v = (void *) type->pbFormat;
+        dshowdebug("      rcSource: left %ld top %ld right %ld bottom %ld\n",
+                   v->rcSource.left, v->rcSource.top, v->rcSource.right, v->rcSource.bottom);
+        dshowdebug("      rcTarget: left %ld top %ld right %ld bottom %ld\n",
+                   v->rcTarget.left, v->rcTarget.top, v->rcTarget.right, v->rcTarget.bottom);
+        dshowdebug("      dwBitRate: %lu\n", v->dwBitRate);
+        dshowdebug("      dwBitErrorRate: %lu\n", v->dwBitErrorRate);
+        dshowdebug("      AvgTimePerFrame: %"PRId64"\n", v->AvgTimePerFrame);
+        dump_bih(NULL, &v->bmiHeader);
+    } else if (IsEqualGUID(&type->formattype, &FORMAT_VideoInfo2)) {
+        VIDEOINFOHEADER2 *v = (void *) type->pbFormat;
+        dshowdebug("      rcSource: left %ld top %ld right %ld bottom %ld\n",
+                   v->rcSource.left, v->rcSource.top, v->rcSource.right, v->rcSource.bottom);
+        dshowdebug("      rcTarget: left %ld top %ld right %ld bottom %ld\n",
+                   v->rcTarget.left, v->rcTarget.top, v->rcTarget.right, v->rcTarget.bottom);
+        dshowdebug("      dwBitRate: %lu\n", v->dwBitRate);
+        dshowdebug("      dwBitErrorRate: %lu\n", v->dwBitErrorRate);
+        dshowdebug("      AvgTimePerFrame: %"PRId64"\n", v->AvgTimePerFrame);
+        dshowdebug("      dwInterlaceFlags: %lu\n", v->dwInterlaceFlags);
+        dshowdebug("      dwCopyProtectFlags: %lu\n", v->dwCopyProtectFlags);
+        dshowdebug("      dwPictAspectRatioX: %lu\n", v->dwPictAspectRatioX);
+        dshowdebug("      dwPictAspectRatioY: %lu\n", v->dwPictAspectRatioY);
+//        dshowdebug("      dwReserved1: %lu\n", v->u.dwReserved1); /* mingw-w64 is buggy and doesn't name unnamed unions */
+        dshowdebug("      dwReserved2: %lu\n", v->dwReserved2);
+        dump_bih(NULL, &v->bmiHeader);
+    } else if (IsEqualGUID(&type->formattype, &FORMAT_WaveFormatEx)) {
+        WAVEFORMATEX *fx = (void *) type->pbFormat;
+        dshowdebug("      wFormatTag: %u\n", fx->wFormatTag);
+        dshowdebug("      nChannels: %u\n", fx->nChannels);
+        dshowdebug("      nSamplesPerSec: %lu\n", fx->nSamplesPerSec);
+        dshowdebug("      nAvgBytesPerSec: %lu\n", fx->nAvgBytesPerSec);
+        dshowdebug("      nBlockAlign: %u\n", fx->nBlockAlign);
+        dshowdebug("      wBitsPerSample: %u\n", fx->wBitsPerSample);
+        dshowdebug("      cbSize: %u\n", fx->cbSize);
+    }
+#endif
+}
diff --git a/libavdevice/dshow_enummediatypes.c b/libavdevice/dshow_enummediatypes.c
new file mode 100644
index 0000000000..a700133ba6
--- /dev/null
+++ b/libavdevice/dshow_enummediatypes.c
@@ -0,0 +1,103 @@
+/*
+ * DirectShow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dshow.h"
+
+DECLARE_QUERYINTERFACE(libAVEnumMediaTypes,
+    { {&IID_IUnknown,0}, {&IID_IEnumPins,0} })
+DECLARE_ADDREF(libAVEnumMediaTypes)
+DECLARE_RELEASE(libAVEnumMediaTypes)
+
+long WINAPI
+libAVEnumMediaTypes_Next(libAVEnumMediaTypes *this, unsigned long n,
+                         AM_MEDIA_TYPE **types, unsigned long *fetched)
+{
+    int count = 0;
+    dshowdebug("libAVEnumMediaTypes_Next(%p)\n", this);
+    if (!types)
+        return E_POINTER;
+    if (!this->pos && n == 1) {
+        if (!IsEqualGUID(&this->type.majortype, &GUID_NULL)) {
+            AM_MEDIA_TYPE *type = av_malloc(sizeof(AM_MEDIA_TYPE));
+            ff_copy_dshow_media_type(type, &this->type);
+            *types = type;
+            count = 1;
+        }
+        this->pos = 1;
+    }
+    if (fetched)
+        *fetched = count;
+    if (!count)
+        return S_FALSE;
+    return S_OK;
+}
+long WINAPI
+libAVEnumMediaTypes_Skip(libAVEnumMediaTypes *this, unsigned long n)
+{
+    dshowdebug("libAVEnumMediaTypes_Skip(%p)\n", this);
+    if (n) /* Any skip will always fall outside of the only valid type. */
+        return S_FALSE;
+    return S_OK;
+}
+long WINAPI
+libAVEnumMediaTypes_Reset(libAVEnumMediaTypes *this)
+{
+    dshowdebug("libAVEnumMediaTypes_Reset(%p)\n", this);
+    this->pos = 0;
+    return S_OK;
+}
+long WINAPI
+libAVEnumMediaTypes_Clone(libAVEnumMediaTypes *this, libAVEnumMediaTypes **enums)
+{
+    libAVEnumMediaTypes *new;
+    dshowdebug("libAVEnumMediaTypes_Clone(%p)\n", this);
+    if (!enums)
+        return E_POINTER;
+    new = libAVEnumMediaTypes_Create(&this->type);
+    if (!new)
+        return E_OUTOFMEMORY;
+    new->pos = this->pos;
+    *enums = new;
+    return S_OK;
+}
+
+static int
+libAVEnumMediaTypes_Setup(libAVEnumMediaTypes *this, const AM_MEDIA_TYPE *type)
+{
+    IEnumPinsVtbl *vtbl = this->vtbl;
+    SETVTBL(vtbl, libAVEnumMediaTypes, QueryInterface);
+    SETVTBL(vtbl, libAVEnumMediaTypes, AddRef);
+    SETVTBL(vtbl, libAVEnumMediaTypes, Release);
+    SETVTBL(vtbl, libAVEnumMediaTypes, Next);
+    SETVTBL(vtbl, libAVEnumMediaTypes, Skip);
+    SETVTBL(vtbl, libAVEnumMediaTypes, Reset);
+    SETVTBL(vtbl, libAVEnumMediaTypes, Clone);
+
+    if (!type) {
+        this->type.majortype = GUID_NULL;
+    } else {
+        ff_copy_dshow_media_type(&this->type, type);
+    }
+
+    return 1;
+}
+DECLARE_CREATE(libAVEnumMediaTypes, libAVEnumMediaTypes_Setup(this, type), const AM_MEDIA_TYPE *type)
+DECLARE_DESTROY(libAVEnumMediaTypes, nothing)
diff --git a/libavdevice/dshow_enumpins.c b/libavdevice/dshow_enumpins.c
new file mode 100644
index 0000000000..97890fb88e
--- /dev/null
+++ b/libavdevice/dshow_enumpins.c
@@ -0,0 +1,99 @@
+/*
+ * DirectShow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dshow.h"
+
+DECLARE_QUERYINTERFACE(libAVEnumPins,
+    { {&IID_IUnknown,0}, {&IID_IEnumPins,0} })
+DECLARE_ADDREF(libAVEnumPins)
+DECLARE_RELEASE(libAVEnumPins)
+
+long WINAPI
+libAVEnumPins_Next(libAVEnumPins *this, unsigned long n, IPin **pins,
+                   unsigned long *fetched)
+{
+    int count = 0;
+    dshowdebug("libAVEnumPins_Next(%p)\n", this);
+    if (!pins)
+        return E_POINTER;
+    if (!this->pos && n == 1) {
+        libAVPin_AddRef(this->pin);
+        *pins = (IPin *) this->pin;
+        count = 1;
+        this->pos = 1;
+    }
+    if (fetched)
+        *fetched = count;
+    if (!count)
+        return S_FALSE;
+    return S_OK;
+}
+long WINAPI
+libAVEnumPins_Skip(libAVEnumPins *this, unsigned long n)
+{
+    dshowdebug("libAVEnumPins_Skip(%p)\n", this);
+    if (n) /* Any skip will always fall outside of the only valid pin. */
+        return S_FALSE;
+    return S_OK;
+}
+long WINAPI
+libAVEnumPins_Reset(libAVEnumPins *this)
+{
+    dshowdebug("libAVEnumPins_Reset(%p)\n", this);
+    this->pos = 0;
+    return S_OK;
+}
+long WINAPI
+libAVEnumPins_Clone(libAVEnumPins *this, libAVEnumPins **pins)
+{
+    libAVEnumPins *new;
+    dshowdebug("libAVEnumPins_Clone(%p)\n", this);
+    if (!pins)
+        return E_POINTER;
+    new = libAVEnumPins_Create(this->pin, this->filter);
+    if (!new)
+        return E_OUTOFMEMORY;
+    new->pos = this->pos;
+    *pins = new;
+    return S_OK;
+}
+
+static int
+libAVEnumPins_Setup(libAVEnumPins *this, libAVPin *pin, libAVFilter *filter)
+{
+    IEnumPinsVtbl *vtbl = this->vtbl;
+    SETVTBL(vtbl, libAVEnumPins, QueryInterface);
+    SETVTBL(vtbl, libAVEnumPins, AddRef);
+    SETVTBL(vtbl, libAVEnumPins, Release);
+    SETVTBL(vtbl, libAVEnumPins, Next);
+    SETVTBL(vtbl, libAVEnumPins, Skip);
+    SETVTBL(vtbl, libAVEnumPins, Reset);
+    SETVTBL(vtbl, libAVEnumPins, Clone);
+
+    this->pin = pin;
+    this->filter = filter;
+    libAVFilter_AddRef(this->filter);
+
+    return 1;
+}
+DECLARE_CREATE(libAVEnumPins, libAVEnumPins_Setup(this, pin, filter),
+               libAVPin *pin, libAVFilter *filter)
+DECLARE_DESTROY(libAVEnumPins, nothing)
diff --git a/libavdevice/dshow_filter.c b/libavdevice/dshow_filter.c
new file mode 100644
index 0000000000..e5a3be854b
--- /dev/null
+++ b/libavdevice/dshow_filter.c
@@ -0,0 +1,196 @@
+/*
+ * DirectShow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dshow.h"
+
+DECLARE_QUERYINTERFACE(libAVFilter,
+    { {&IID_IUnknown,0}, {&IID_IBaseFilter,0} })
+DECLARE_ADDREF(libAVFilter)
+DECLARE_RELEASE(libAVFilter)
+
+long WINAPI
+libAVFilter_GetClassID(libAVFilter *this, CLSID *id)
+{
+    dshowdebug("libAVFilter_GetClassID(%p)\n", this);
+    /* I'm not creating a ClassID just for this. */
+    return E_FAIL;
+}
+long WINAPI
+libAVFilter_Stop(libAVFilter *this)
+{
+    dshowdebug("libAVFilter_Stop(%p)\n", this);
+    this->state = State_Stopped;
+    return S_OK;
+}
+long WINAPI
+libAVFilter_Pause(libAVFilter *this)
+{
+    dshowdebug("libAVFilter_Pause(%p)\n", this);
+    this->state = State_Paused;
+    return S_OK;
+}
+long WINAPI
+libAVFilter_Run(libAVFilter *this, REFERENCE_TIME start)
+{
+    dshowdebug("libAVFilter_Run(%p) %"PRId64"\n", this, start);
+    this->state = State_Running;
+    this->start_time = start;
+    return S_OK;
+}
+long WINAPI
+libAVFilter_GetState(libAVFilter *this, DWORD ms, FILTER_STATE *state)
+{
+    dshowdebug("libAVFilter_GetState(%p)\n", this);
+    if (!state)
+        return E_POINTER;
+    *state = this->state;
+    return S_OK;
+}
+long WINAPI
+libAVFilter_SetSyncSource(libAVFilter *this, IReferenceClock *clock)
+{
+    dshowdebug("libAVFilter_SetSyncSource(%p)\n", this);
+
+    if (this->clock != clock) {
+        if (this->clock)
+            IReferenceClock_Release(this->clock);
+        this->clock = clock;
+        if (clock)
+            IReferenceClock_AddRef(clock);
+    }
+
+    return S_OK;
+}
+long WINAPI
+libAVFilter_GetSyncSource(libAVFilter *this, IReferenceClock **clock)
+{
+    dshowdebug("libAVFilter_GetSyncSource(%p)\n", this);
+
+    if (!clock)
+        return E_POINTER;
+    if (this->clock)
+        IReferenceClock_AddRef(this->clock);
+    *clock = this->clock;
+
+    return S_OK;
+}
+long WINAPI
+libAVFilter_EnumPins(libAVFilter *this, IEnumPins **enumpin)
+{
+    libAVEnumPins *new;
+    dshowdebug("libAVFilter_EnumPins(%p)\n", this);
+
+    if (!enumpin)
+        return E_POINTER;
+    new = libAVEnumPins_Create(this->pin, this);
+    if (!new)
+        return E_OUTOFMEMORY;
+
+    *enumpin = (IEnumPins *) new;
+    return S_OK;
+}
+long WINAPI
+libAVFilter_FindPin(libAVFilter *this, const wchar_t *id, IPin **pin)
+{
+    libAVPin *found = NULL;
+    dshowdebug("libAVFilter_FindPin(%p)\n", this);
+
+    if (!id || !pin)
+        return E_POINTER;
+    if (!wcscmp(id, L"In")) {
+        found = this->pin;
+        libAVPin_AddRef(found);
+    }
+    *pin = (IPin *) found;
+    if (!found)
+        return VFW_E_NOT_FOUND;
+
+    return S_OK;
+}
+long WINAPI
+libAVFilter_QueryFilterInfo(libAVFilter *this, FILTER_INFO *info)
+{
+    dshowdebug("libAVFilter_QueryFilterInfo(%p)\n", this);
+
+    if (!info)
+        return E_POINTER;
+    if (this->info.pGraph)
+        IFilterGraph_AddRef(this->info.pGraph);
+    *info = this->info;
+
+    return S_OK;
+}
+long WINAPI
+libAVFilter_JoinFilterGraph(libAVFilter *this, IFilterGraph *graph,
+                            const wchar_t *name)
+{
+    dshowdebug("libAVFilter_JoinFilterGraph(%p)\n", this);
+
+    this->info.pGraph = graph;
+    if (name)
+        wcscpy(this->info.achName, name);
+
+    return S_OK;
+}
+long WINAPI
+libAVFilter_QueryVendorInfo(libAVFilter *this, wchar_t **info)
+{
+    dshowdebug("libAVFilter_QueryVendorInfo(%p)\n", this);
+
+    if (!info)
+        return E_POINTER;
+    *info = wcsdup(L"libAV");
+
+    return S_OK;
+}
+
+static int
+libAVFilter_Setup(libAVFilter *this, void *priv_data, void *callback,
+                  enum dshowDeviceType type)
+{
+    IBaseFilterVtbl *vtbl = this->vtbl;
+    SETVTBL(vtbl, libAVFilter, QueryInterface);
+    SETVTBL(vtbl, libAVFilter, AddRef);
+    SETVTBL(vtbl, libAVFilter, Release);
+    SETVTBL(vtbl, libAVFilter, GetClassID);
+    SETVTBL(vtbl, libAVFilter, Stop);
+    SETVTBL(vtbl, libAVFilter, Pause);
+    SETVTBL(vtbl, libAVFilter, Run);
+    SETVTBL(vtbl, libAVFilter, GetState);
+    SETVTBL(vtbl, libAVFilter, SetSyncSource);
+    SETVTBL(vtbl, libAVFilter, GetSyncSource);
+    SETVTBL(vtbl, libAVFilter, EnumPins);
+    SETVTBL(vtbl, libAVFilter, FindPin);
+    SETVTBL(vtbl, libAVFilter, QueryFilterInfo);
+    SETVTBL(vtbl, libAVFilter, JoinFilterGraph);
+    SETVTBL(vtbl, libAVFilter, QueryVendorInfo);
+
+    this->pin = libAVPin_Create(this);
+
+    this->priv_data = priv_data;
+    this->callback  = callback;
+    this->type      = type;
+
+    return 1;
+}
+DECLARE_CREATE(libAVFilter, libAVFilter_Setup(this, priv_data, callback, type),
+               void *priv_data, void *callback, enum dshowDeviceType type)
+DECLARE_DESTROY(libAVFilter, nothing)
diff --git a/libavdevice/dshow_pin.c b/libavdevice/dshow_pin.c
new file mode 100644
index 0000000000..f31ecc6c99
--- /dev/null
+++ b/libavdevice/dshow_pin.c
@@ -0,0 +1,361 @@
+/*
+ * DirectShow capture interface
+ * Copyright (c) 2010 Ramiro Polla
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dshow.h"
+
+#include <stddef.h>
+#define imemoffset offsetof(libAVPin, imemvtbl)
+
+DECLARE_QUERYINTERFACE(libAVPin,
+    { {&IID_IUnknown,0}, {&IID_IPin,0}, {&IID_IMemInputPin,imemoffset} })
+DECLARE_ADDREF(libAVPin)
+DECLARE_RELEASE(libAVPin)
+
+long WINAPI
+libAVPin_Connect(libAVPin *this, IPin *pin, const AM_MEDIA_TYPE *type)
+{
+    dshowdebug("libAVPin_Connect(%p, %p, %p)\n", this, pin, type);
+    /* Input pins receive connections. */
+    return S_FALSE;
+}
+long WINAPI
+libAVPin_ReceiveConnection(libAVPin *this, IPin *pin,
+                           const AM_MEDIA_TYPE *type)
+{
+    enum dshowDeviceType devtype = this->filter->type;
+    dshowdebug("libAVPin_ReceiveConnection(%p)\n", this);
+
+    if (!pin)
+        return E_POINTER;
+    if (this->connectedto)
+        return VFW_E_ALREADY_CONNECTED;
+
+    ff_print_AM_MEDIA_TYPE(type);
+    if (devtype == VideoDevice) {
+        if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Video))
+            return VFW_E_TYPE_NOT_ACCEPTED;
+    } else {
+        if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Audio))
+            return VFW_E_TYPE_NOT_ACCEPTED;
+    }
+
+    IPin_AddRef(pin);
+    this->connectedto = pin;
+
+    ff_copy_dshow_media_type(&this->type, type);
+
+    return S_OK;
+}
+long WINAPI
+libAVPin_Disconnect(libAVPin *this)
+{
+    dshowdebug("libAVPin_Disconnect(%p)\n", this);
+
+    if (this->filter->state != State_Stopped)
+        return VFW_E_NOT_STOPPED;
+    if (!this->connectedto)
+        return S_FALSE;
+    this->connectedto = NULL;
+
+    return S_OK;
+}
+long WINAPI
+libAVPin_ConnectedTo(libAVPin *this, IPin **pin)
+{
+    dshowdebug("libAVPin_ConnectedTo(%p)\n", this);
+
+    if (!pin)
+        return E_POINTER;
+    if (!this->connectedto)
+        return VFW_E_NOT_CONNECTED;
+    IPin_AddRef(this->connectedto);
+    *pin = this->connectedto;
+
+    return S_OK;
+}
+long WINAPI
+libAVPin_ConnectionMediaType(libAVPin *this, AM_MEDIA_TYPE *type)
+{
+    dshowdebug("libAVPin_ConnectionMediaType(%p)\n", this);
+
+    if (!type)
+        return E_POINTER;
+    if (!this->connectedto)
+        return VFW_E_NOT_CONNECTED;
+
+    return ff_copy_dshow_media_type(type, &this->type);
+}
+long WINAPI
+libAVPin_QueryPinInfo(libAVPin *this, PIN_INFO *info)
+{
+    dshowdebug("libAVPin_QueryPinInfo(%p)\n", this);
+
+    if (!info)
+        return E_POINTER;
+
+    if (this->filter)
+        libAVFilter_AddRef(this->filter);
+
+    info->pFilter = (IBaseFilter *) this->filter;
+    info->dir     = PINDIR_INPUT;
+    wcscpy(info->achName, L"Capture");
+
+    return S_OK;
+}
+long WINAPI
+libAVPin_QueryDirection(libAVPin *this, PIN_DIRECTION *dir)
+{
+    dshowdebug("libAVPin_QueryDirection(%p)\n", this);
+    if (!dir)
+        return E_POINTER;
+    *dir = PINDIR_INPUT;
+    return S_OK;
+}
+long WINAPI
+libAVPin_QueryId(libAVPin *this, wchar_t **id)
+{
+    dshowdebug("libAVPin_QueryId(%p)\n", this);
+
+    if (!id)
+        return E_POINTER;
+
+    *id = wcsdup(L"libAV Pin");
+
+    return S_OK;
+}
+long WINAPI
+libAVPin_QueryAccept(libAVPin *this, const AM_MEDIA_TYPE *type)
+{
+    dshowdebug("libAVPin_QueryAccept(%p)\n", this);
+    return S_FALSE;
+}
+long WINAPI
+libAVPin_EnumMediaTypes(libAVPin *this, IEnumMediaTypes **enumtypes)
+{
+    const AM_MEDIA_TYPE *type = NULL;
+    libAVEnumMediaTypes *new;
+    dshowdebug("libAVPin_EnumMediaTypes(%p)\n", this);
+
+    if (!enumtypes)
+        return E_POINTER;
+    new = libAVEnumMediaTypes_Create(type);
+    if (!new)
+        return E_OUTOFMEMORY;
+
+    *enumtypes = (IEnumMediaTypes *) new;
+    return S_OK;
+}
+long WINAPI
+libAVPin_QueryInternalConnections(libAVPin *this, IPin **pin,
+                                  unsigned long *npin)
+{
+    dshowdebug("libAVPin_QueryInternalConnections(%p)\n", this);
+    return E_NOTIMPL;
+}
+long WINAPI
+libAVPin_EndOfStream(libAVPin *this)
+{
+    dshowdebug("libAVPin_EndOfStream(%p)\n", this);
+    /* I don't care. */
+    return S_OK;
+}
+long WINAPI
+libAVPin_BeginFlush(libAVPin *this)
+{
+    dshowdebug("libAVPin_BeginFlush(%p)\n", this);
+    /* I don't care. */
+    return S_OK;
+}
+long WINAPI
+libAVPin_EndFlush(libAVPin *this)
+{
+    dshowdebug("libAVPin_EndFlush(%p)\n", this);
+    /* I don't care. */
+    return S_OK;
+}
+long WINAPI
+libAVPin_NewSegment(libAVPin *this, REFERENCE_TIME start, REFERENCE_TIME stop,
+                    double rate)
+{
+    dshowdebug("libAVPin_NewSegment(%p)\n", this);
+    /* I don't care. */
+    return S_OK;
+}
+
+static int
+libAVPin_Setup(libAVPin *this, libAVFilter *filter)
+{
+    IPinVtbl *vtbl = this->vtbl;
+    IMemInputPinVtbl *imemvtbl;
+
+    if (!filter)
+        return 0;
+
+    imemvtbl = av_malloc(sizeof(IMemInputPinVtbl));
+    if (!imemvtbl)
+        return 0;
+
+    SETVTBL(imemvtbl, libAVMemInputPin, QueryInterface);
+    SETVTBL(imemvtbl, libAVMemInputPin, AddRef);
+    SETVTBL(imemvtbl, libAVMemInputPin, Release);
+    SETVTBL(imemvtbl, libAVMemInputPin, GetAllocator);
+    SETVTBL(imemvtbl, libAVMemInputPin, NotifyAllocator);
+    SETVTBL(imemvtbl, libAVMemInputPin, GetAllocatorRequirements);
+    SETVTBL(imemvtbl, libAVMemInputPin, Receive);
+    SETVTBL(imemvtbl, libAVMemInputPin, ReceiveMultiple);
+    SETVTBL(imemvtbl, libAVMemInputPin, ReceiveCanBlock);
+
+    this->imemvtbl = imemvtbl;
+
+    SETVTBL(vtbl, libAVPin, QueryInterface);
+    SETVTBL(vtbl, libAVPin, AddRef);
+    SETVTBL(vtbl, libAVPin, Release);
+    SETVTBL(vtbl, libAVPin, Connect);
+    SETVTBL(vtbl, libAVPin, ReceiveConnection);
+    SETVTBL(vtbl, libAVPin, Disconnect);
+    SETVTBL(vtbl, libAVPin, ConnectedTo);
+    SETVTBL(vtbl, libAVPin, ConnectionMediaType);
+    SETVTBL(vtbl, libAVPin, QueryPinInfo);
+    SETVTBL(vtbl, libAVPin, QueryDirection);
+    SETVTBL(vtbl, libAVPin, QueryId);
+    SETVTBL(vtbl, libAVPin, QueryAccept);
+    SETVTBL(vtbl, libAVPin, EnumMediaTypes);
+    SETVTBL(vtbl, libAVPin, QueryInternalConnections);
+    SETVTBL(vtbl, libAVPin, EndOfStream);
+    SETVTBL(vtbl, libAVPin, BeginFlush);
+    SETVTBL(vtbl, libAVPin, EndFlush);
+    SETVTBL(vtbl, libAVPin, NewSegment);
+
+    this->filter = filter;
+
+    return 1;
+}
+DECLARE_CREATE(libAVPin, libAVPin_Setup(this, filter), libAVFilter *filter)
+DECLARE_DESTROY(libAVPin, nothing)
+
+/*****************************************************************************
+ * libAVMemInputPin
+ ****************************************************************************/
+long WINAPI
+libAVMemInputPin_QueryInterface(libAVMemInputPin *this, const GUID *riid,
+                                void **ppvObject)
+{
+    libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
+    dshowdebug("libAVMemInputPin_QueryInterface(%p)\n", this);
+    return libAVPin_QueryInterface(pin, riid, ppvObject);
+}
+unsigned long WINAPI
+libAVMemInputPin_AddRef(libAVMemInputPin *this)
+{
+    libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
+    dshowdebug("libAVMemInputPin_AddRef(%p)\n", this);
+    return libAVPin_AddRef(pin);
+}
+unsigned long WINAPI
+libAVMemInputPin_Release(libAVMemInputPin *this)
+{
+    libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
+    dshowdebug("libAVMemInputPin_Release(%p)\n", this);
+    return libAVPin_Release(pin);
+}
+long WINAPI
+libAVMemInputPin_GetAllocator(libAVMemInputPin *this, IMemAllocator **alloc)
+{
+    dshowdebug("libAVMemInputPin_GetAllocator(%p)\n", this);
+    return VFW_E_NO_ALLOCATOR;
+}
+long WINAPI
+libAVMemInputPin_NotifyAllocator(libAVMemInputPin *this, IMemAllocator *alloc,
+                                 WINBOOL rdwr)
+{
+    dshowdebug("libAVMemInputPin_NotifyAllocator(%p)\n", this);
+    return S_OK;
+}
+long WINAPI
+libAVMemInputPin_GetAllocatorRequirements(libAVMemInputPin *this,
+                                          ALLOCATOR_PROPERTIES *props)
+{
+    dshowdebug("libAVMemInputPin_GetAllocatorRequirements(%p)\n", this);
+    return E_NOTIMPL;
+}
+long WINAPI
+libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
+{
+    libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
+    enum dshowDeviceType devtype = pin->filter->type;
+    void *priv_data;
+    uint8_t *buf;
+    int buf_size;
+    int index;
+    int64_t curtime;
+
+    dshowdebug("libAVMemInputPin_Receive(%p)\n", this);
+
+    if (!sample)
+        return E_POINTER;
+
+    if (devtype == VideoDevice) {
+        /* PTS from video devices is unreliable. */
+        IReferenceClock *clock = pin->filter->clock;
+        IReferenceClock_GetTime(clock, &curtime);
+    } else {
+        int64_t dummy;
+        IMediaSample_GetTime(sample, &curtime, &dummy);
+        curtime += pin->filter->start_time;
+    }
+
+    buf_size = IMediaSample_GetActualDataLength(sample);
+    IMediaSample_GetPointer(sample, &buf);
+    priv_data = pin->filter->priv_data;
+    index = pin->filter->stream_index;
+
+    pin->filter->callback(priv_data, index, buf, buf_size, curtime);
+
+    return S_OK;
+}
+long WINAPI
+libAVMemInputPin_ReceiveMultiple(libAVMemInputPin *this,
+                                 IMediaSample **samples, long n, long *nproc)
+{
+    int i;
+    dshowdebug("libAVMemInputPin_ReceiveMultiple(%p)\n", this);
+
+    for (i = 0; i < n; i++)
+        libAVMemInputPin_Receive(this, samples[i]);
+
+    *nproc = n;
+    return S_OK;
+}
+long WINAPI
+libAVMemInputPin_ReceiveCanBlock(libAVMemInputPin *this)
+{
+    dshowdebug("libAVMemInputPin_ReceiveCanBlock(%p)\n", this);
+    /* I swear I will not block. */
+    return S_FALSE;
+}
+
+void
+libAVMemInputPin_Destroy(libAVMemInputPin *this)
+{
+    libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
+    dshowdebug("libAVMemInputPin_Destroy(%p)\n", this);
+    return libAVPin_Destroy(pin);
+}
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 2155db6ece..b8ca419b40 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -29,8 +29,6 @@
  * Remove this when MinGW incorporates them. */
 #define HWND_MESSAGE                ((HWND)-3)
 
-#define BI_RGB                      0
-
 /* End of missing MinGW defines */
 
 struct vfw_ctx {

From a38a00eddf200e18c6cf3ba090beaa3b224bd114 Mon Sep 17 00:00:00 2001
From: Bobby Bingham <uhmmmm@gmail.com>
Date: Sat, 21 May 2011 16:46:11 +0200
Subject: [PATCH 175/830] libavfilter: vf_split from soc.

Commited by michael, for detailed authorship see soc repo
---
 libavfilter/Makefile     |  1 +
 libavfilter/allfilters.c |  1 +
 libavfilter/vf_split.c   | 63 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)
 create mode 100644 libavfilter/vf_split.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index de34089468..8ea3169841 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -52,6 +52,7 @@ OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETTB_FILTER)                  += vf_settb.o
 OBJS-$(CONFIG_SHOWINFO_FILTER)               += vf_showinfo.o
 OBJS-$(CONFIG_SLICIFY_FILTER)                += vf_slicify.o
+OBJS-$(CONFIG_SPLIT_FILTER)                  += vf_split.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 1973602305..0b6487f540 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -68,6 +68,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER (SETTB,       settb,       vf);
     REGISTER_FILTER (SHOWINFO,    showinfo,    vf);
     REGISTER_FILTER (SLICIFY,     slicify,     vf);
+    REGISTER_FILTER (SPLIT,       split,       vf);
     REGISTER_FILTER (TRANSPOSE,   transpose,   vf);
     REGISTER_FILTER (UNSHARP,     unsharp,     vf);
     REGISTER_FILTER (VFLIP,       vflip,       vf);
diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c
new file mode 100644
index 0000000000..635c9284f8
--- /dev/null
+++ b/libavfilter/vf_split.c
@@ -0,0 +1,63 @@
+/*
+ * Video splitter
+ * copyright (c) 2007 Bobby Bingham
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avfilter.h"
+
+static void start_frame(AVFilterLink *link, AVFilterBufferRef *picref)
+{
+    avfilter_start_frame(link->dst->outputs[0],
+                         avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
+    avfilter_start_frame(link->dst->outputs[1],
+                         avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
+}
+
+static void end_frame(AVFilterLink *link)
+{
+    avfilter_end_frame(link->dst->outputs[0]);
+    avfilter_end_frame(link->dst->outputs[1]);
+
+    avfilter_unref_buffer(link->cur_buf);
+}
+
+static void draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
+{
+    avfilter_draw_slice(link->dst->outputs[0], y, h, slice_dir);
+    avfilter_draw_slice(link->dst->outputs[1], y, h, slice_dir);
+}
+
+AVFilter avfilter_vf_split =
+{
+    .name      = "split",
+
+    .inputs    = (AVFilterPad[]) {{ .name            = "default",
+                                    .type            = AVMEDIA_TYPE_VIDEO,
+                                    .get_video_buffer= avfilter_null_get_video_buffer,
+                                    .start_frame     = start_frame,
+                                    .draw_slice      = draw_slice,
+                                    .end_frame       = end_frame, },
+                                  { .name = NULL}},
+    .outputs   = (AVFilterPad[]) {{ .name            = "default",
+                                    .type            = AVMEDIA_TYPE_VIDEO, },
+                                  { .name            = "default2",
+                                    .type            = AVMEDIA_TYPE_VIDEO, },
+                                  { .name = NULL}},
+};
+

From 8dd12b077b9f6c8437f387195feaefa806f93957 Mon Sep 17 00:00:00 2001
From: Alok Ahuja <waveletcoeff@gmail.com>
Date: Sat, 23 Apr 2011 22:29:13 -0700
Subject: [PATCH 176/830] Create a stereo_mode metadata tag to specify the
 stereo 3d video layout using the StereoMode tag in a matroska/webm video
 track.

---
 libavformat/matroska.h    | 20 +++++++++++++++++++-
 libavformat/matroskaenc.c | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 0d0d0bcff4..57497ef7a3 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -111,7 +111,7 @@
 #define MATROSKA_ID_VIDEOPIXELCROPR 0x54DD
 #define MATROSKA_ID_VIDEODISPLAYUNIT 0x54B2
 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
-#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
+#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B8
 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
 #define MATROSKA_ID_VIDEOCOLORSPACE 0x2EB524
 
@@ -218,6 +218,24 @@ typedef enum {
   MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3,
 } MatroskaTrackEncodingCompAlgo;
 
+typedef enum {
+  MATROSKA_VIDEO_STEREOMODE_TYPE_MONO               = 0,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT         = 1,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP         = 2,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM         = 3,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL    = 4,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR    = 5,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED  = 10,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT         = 11,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14,
+} MatroskaVideoStereoModeType;
+
 /*
  * Matroska Codec IDs, strings
  */
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 387cead8cd..5ac2fd4896 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -586,6 +586,25 @@ static int mkv_write_tracks(AVFormatContext *s)
                 // XXX: interlace flag?
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width);
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height);
+                if ((tag = av_metadata_get(s->metadata, "stereo_mode", NULL, 0))) {
+                    uint8_t stereo_fmt = atoi(tag->value);
+                    int valid_fmt = 0;
+
+                    switch (mkv->mode) {
+                    case MODE_WEBM:
+                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM
+                            || stereo_fmt == MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT)
+                            valid_fmt = 1;
+                        break;
+                    case MODE_MATROSKAv2:
+                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL)
+                            valid_fmt = 1;
+                        break;
+                    }
+
+                    if (valid_fmt)
+                        put_ebml_uint (pb, MATROSKA_ID_VIDEOSTEREOMODE, stereo_fmt);
+                }
                 if (st->sample_aspect_ratio.num) {
                     int d_width = codec->width*av_q2d(st->sample_aspect_ratio);
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYWIDTH , d_width);

From 69a83da5d40833e20da07830eda8ecbc31735116 Mon Sep 17 00:00:00 2001
From: Kirill Gavrilov <gavr.mail@gmail.com>
Date: Sat, 21 May 2011 17:08:53 +0200
Subject: [PATCH 177/830] matroska: cosmetics

---
 libavformat/matroska.h    | 34 +++++++++++++++++-----------------
 libavformat/matroskaenc.c | 17 +++++++++--------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 57497ef7a3..b112351529 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -77,8 +77,8 @@
 #define MATROSKA_ID_TRACKNUMBER 0xD7
 #define MATROSKA_ID_TRACKUID   0x73C5
 #define MATROSKA_ID_TRACKTYPE  0x83
-#define MATROSKA_ID_TRACKAUDIO 0xE1
-#define MATROSKA_ID_TRACKVIDEO 0xE0
+#define MATROSKA_ID_TRACKVIDEO     0xE0
+#define MATROSKA_ID_TRACKAUDIO     0xE1
 #define MATROSKA_ID_CODECID    0x86
 #define MATROSKA_ID_CODECPRIVATE 0x63A2
 #define MATROSKA_ID_CODECNAME  0x258688
@@ -219,21 +219,21 @@ typedef enum {
 } MatroskaTrackEncodingCompAlgo;
 
 typedef enum {
-  MATROSKA_VIDEO_STEREOMODE_TYPE_MONO               = 0,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT         = 1,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP         = 2,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM         = 3,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL    = 4,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR    = 5,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED  = 10,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT         = 11,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13,
-  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14,
+  MATROSKA_VIDEO_STEREOMODE_MONO               = 0,
+  MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT         = 1,
+  MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP         = 2,
+  MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM         = 3,
+  MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL    = 4,
+  MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR    = 5,
+  MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL = 6,
+  MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR = 7,
+  MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL = 8,
+  MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR = 9,
+  MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED  = 10,
+  MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT         = 11,
+  MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG = 12,
+  MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR = 13,
+  MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL = 14,
 } MatroskaVideoStereoModeType;
 
 /*
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 5ac2fd4896..564b7509c1 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -591,20 +591,21 @@ static int mkv_write_tracks(AVFormatContext *s)
                     int valid_fmt = 0;
 
                     switch (mkv->mode) {
-                    case MODE_WEBM:
-                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM
-                            || stereo_fmt == MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT)
+                        case MODE_WEBM:
+                            if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM
+                             || stereo_fmt == MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT)
                             valid_fmt = 1;
-                        break;
-                    case MODE_MATROSKAv2:
-                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL)
-                            valid_fmt = 1;
-                        break;
+                            break;
+                        case MODE_MATROSKAv2:
+                            if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL)
+                                valid_fmt = 1;
+                            break;
                     }
 
                     if (valid_fmt)
                         put_ebml_uint (pb, MATROSKA_ID_VIDEOSTEREOMODE, stereo_fmt);
                 }
+
                 if (st->sample_aspect_ratio.num) {
                     int d_width = codec->width*av_q2d(st->sample_aspect_ratio);
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYWIDTH , d_width);

From e6ec9212c543c77ab3ddab90ac52021cfbbdac17 Mon Sep 17 00:00:00 2001
From: Kirill Gavrilov <gavr.mail@gmail.com>
Date: Sat, 21 May 2011 17:14:14 +0200
Subject: [PATCH 178/830] matroska: switch stereo mode from int to string and
 add support in the demuxer too

---
 libavformat/matroska.h    |   5 ++
 libavformat/matroskadec.c | 121 +++++++++++++++++++++++++++++++++++++-
 libavformat/matroskaenc.c |  39 +++++++++++-
 3 files changed, 161 insertions(+), 4 deletions(-)

diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index b112351529..48959772f1 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -79,6 +79,11 @@
 #define MATROSKA_ID_TRACKTYPE  0x83
 #define MATROSKA_ID_TRACKVIDEO     0xE0
 #define MATROSKA_ID_TRACKAUDIO     0xE1
+#define MATROSKA_ID_TRACKOPERATION 0xE2
+#define MATROSKA_ID_TRACKCOMBINEPLANES 0xE3
+#define MATROSKA_ID_TRACKPLANE         0xE4
+#define MATROSKA_ID_TRACKPLANEUID      0xE5
+#define MATROSKA_ID_TRACKPLANETYPE     0xE6
 #define MATROSKA_ID_CODECID    0x86
 #define MATROSKA_ID_CODECPRIVATE 0x63A2
 #define MATROSKA_ID_CODECNAME  0x258688
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 8f9340dbdc..90623bf89f 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -112,6 +112,7 @@ typedef struct {
     uint64_t pixel_width;
     uint64_t pixel_height;
     uint64_t fourcc;
+    uint64_t stereoMode;
 } MatroskaTrackVideo;
 
 typedef struct {
@@ -131,6 +132,16 @@ typedef struct {
     uint8_t *buf;
 } MatroskaTrackAudio;
 
+typedef struct {
+    uint64_t uid;
+    uint64_t type;
+} MatroskaTrackPlane;
+
+typedef struct {
+    EbmlList combine_planes;
+    /*EbmlList join_blocks;*/
+} MatroskaTrackOperation;
+
 typedef struct {
     uint64_t num;
     uint64_t uid;
@@ -145,6 +156,7 @@ typedef struct {
     uint64_t flag_forced;
     MatroskaTrackVideo video;
     MatroskaTrackAudio audio;
+    MatroskaTrackOperation operation;
     EbmlList encodings;
 
     AVStream *stream;
@@ -291,13 +303,13 @@ static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
     { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
     { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
+    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_UINT, MATROSKA_VIDEO_STEREOMODE_MONO, offsetof(MatroskaTrackVideo,stereoMode) },
     { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
     { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
     { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
-    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
     { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
     { 0 }
 };
@@ -329,6 +341,22 @@ static EbmlSyntax matroska_track_encodings[] = {
     { 0 }
 };
 
+static EbmlSyntax matroska_track_plane[] = {
+    { MATROSKA_ID_TRACKPLANEUID,  EBML_UINT, 0, offsetof(MatroskaTrackPlane,uid) },
+    { MATROSKA_ID_TRACKPLANETYPE, EBML_UINT, 0, offsetof(MatroskaTrackPlane,type) },
+    { 0 }
+};
+
+static EbmlSyntax matroska_track_combine_planes[] = {
+    { MATROSKA_ID_TRACKPLANE, EBML_NEST, sizeof(MatroskaTrackPlane), offsetof(MatroskaTrackOperation,combine_planes), {.n=matroska_track_plane} },
+    { 0 }
+};
+
+static EbmlSyntax matroska_track_operation[] = {
+    { MATROSKA_ID_TRACKCOMBINEPLANES, EBML_NEST, 0, 0, {.n=matroska_track_combine_planes} },
+    { 0 }
+};
+
 static EbmlSyntax matroska_track[] = {
     { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
     { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
@@ -343,6 +371,7 @@ static EbmlSyntax matroska_track[] = {
     { MATROSKA_ID_TRACKFLAGFORCED,      EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
     { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
     { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
+    { MATROSKA_ID_TRACKOPERATION,       EBML_NEST, 0, offsetof(MatroskaTrack,operation), {.n=matroska_track_operation} },
     { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
     { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
     { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
@@ -1196,6 +1225,9 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
     EbmlList *chapters_list = &matroska->chapters;
     MatroskaChapter *chapters;
     MatroskaTrack *tracks;
+    EbmlList *combined_list;
+    MatroskaTrackPlane *planes;
+    char stereo_str[256];
     EbmlList *index_list;
     MatroskaIndex *index;
     int index_scale = 1;
@@ -1209,13 +1241,18 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
     /* First read the EBML header. */
     if (ebml_parse(matroska, ebml_syntax, &ebml)
         || ebml.version > EBML_VERSION       || ebml.max_size > sizeof(uint64_t)
-        || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) {
+        || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 3) {
         av_log(matroska->ctx, AV_LOG_ERROR,
                "EBML header using unsupported features\n"
                "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
                ebml.version, ebml.doctype, ebml.doctype_version);
         ebml_free(ebml_syntax, &ebml);
         return AVERROR_PATCHWELCOME;
+    } else if (ebml.doctype_version == 3) {
+        av_log(matroska->ctx, AV_LOG_WARNING,
+               "EBML header using unsupported features\n"
+               "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
+               ebml.version, ebml.doctype, ebml.doctype_version);
     }
     for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
         if (!strcmp(ebml.doctype, matroska_doctypes[i]))
@@ -1475,6 +1512,86 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             st->need_parsing = AVSTREAM_PARSE_HEADERS;
             if (track->default_duration)
                 st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX);
+
+            /* restore stereo mode flag as metadata tag */
+            switch (track->video.stereoMode) {
+                case MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "left_right", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "bottom_top", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "top_bottom", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "checkerboard_rl", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "checkerboard_lr", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "row_interleaved_rl", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "row_interleaved_lr", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "col_interleaved_rl", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "col_interleaved_lr", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "anaglyph_cyan_red", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "right_left", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "anaglyph_green_magenta", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "block_lr", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL:
+                    av_metadata_set2(&st->metadata, "STEREO_MODE", "block_rl", 0);
+                    break;
+                case MATROSKA_VIDEO_STEREOMODE_MONO:
+                default:
+                    /**av_metadata_set2(&st->metadata, "STEREO_MODE", "mono", 0);*/
+                    break;
+            }
+
+            /* if we have virtual track - mark the real tracks */
+            combined_list = &track->operation.combine_planes;
+            planes = combined_list->elem;
+            for (int plane_id = 0; plane_id < combined_list->nb_elem; ++plane_id) {
+                switch (planes[plane_id].type) {
+                    case 0: {
+                        snprintf(stereo_str, sizeof(stereo_str), "left_%d", i);
+                        break;
+                    }
+                    case 1: {
+                        snprintf(stereo_str, sizeof(stereo_str), "right_%d", i);
+                        break;
+                    }
+                    case 2: {
+                        snprintf(stereo_str, sizeof(stereo_str), "background_%d", i);
+                        break;
+                    }
+                    default: {
+                        continue;
+                    }
+                }
+                for (int track_id = 0; track_id < matroska->tracks.nb_elem && track_id < i; ++track_id) {
+                    MatroskaTrack *check_track = &tracks[track_id];
+                    if (planes[plane_id].uid == check_track->uid) {
+                        av_metadata_set2(&s->streams[track_id]->metadata, "STEREO_MODE", stereo_str, 0);
+                        break;
+                    }
+                }
+            }
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
             st->codec->sample_rate = track->audio.out_samplerate;
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 564b7509c1..0fe760b684 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -586,10 +586,45 @@ static int mkv_write_tracks(AVFormatContext *s)
                 // XXX: interlace flag?
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width);
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height);
-                if ((tag = av_metadata_get(s->metadata, "stereo_mode", NULL, 0))) {
-                    uint8_t stereo_fmt = atoi(tag->value);
+
+                if ((tag = av_metadata_get(st->metadata, "STEREO_MODE", NULL, 0)) ||
+                    (tag = av_metadata_get( s->metadata, "STEREO_MODE", NULL, 0))) {
+                    // save stereomode flag
+                    uint64_t stereo_fmt = -1;
                     int valid_fmt = 0;
 
+                    if (!strcmp(tag->value, "mono")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_MONO;
+                    } else if (!strcmp(tag->value, "left_right")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT;
+                    } else if (!strcmp(tag->value, "bottom_top")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP;
+                    } else if (!strcmp(tag->value, "top_bottom")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM;
+                    } else if (!strcmp(tag->value, "checkerboard_rl")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL;
+                    } else if (!strcmp(tag->value, "checkerboard_lr")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR;
+                    } else if (!strcmp(tag->value, "row_interleaved_rl")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL;
+                    } else if (!strcmp(tag->value, "row_interleaved_lr")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR;
+                    } else if (!strcmp(tag->value, "col_interleaved_rl")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL;
+                    } else if (!strcmp(tag->value, "col_interleaved_lr")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR;
+                    } else if (!strcmp(tag->value, "anaglyph_cyan_red")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED;
+                    } else if (!strcmp(tag->value, "right_left")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT;
+                    } else if (!strcmp(tag->value, "anaglyph_green_magenta")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG;
+                    } else if (!strcmp(tag->value, "block_lr")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR;
+                    } else if (!strcmp(tag->value, "block_rl")) {
+                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL;
+                    }
+
                     switch (mkv->mode) {
                         case MODE_WEBM:
                             if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM

From 3758eb0eb96217c6968d47487533337f96aeecfb Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Tue, 17 May 2011 18:26:01 +0200
Subject: [PATCH 179/830] dct32: port SSE 32-point DCT to YASM

---
 libavcodec/x86/Makefile      |   3 +-
 libavcodec/x86/dct32_sse.asm | 289 ++++++++++++++++++++++++++++++++++
 libavcodec/x86/dct32_sse.c   | 296 -----------------------------------
 3 files changed, 291 insertions(+), 297 deletions(-)
 create mode 100644 libavcodec/x86/dct32_sse.asm
 delete mode 100644 libavcodec/x86/dct32_sse.c

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 5f428501e3..ba664abb1e 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -1,6 +1,8 @@
 OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 
+YASM-OBJS-$(CONFIG_DCT)                += x86/dct32_sse.o
+
 YASM-OBJS-FFT-$(HAVE_AMD3DNOW)         += x86/fft_3dn.o
 YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT)      += x86/fft_3dn2.o
 YASM-OBJS-FFT-$(HAVE_SSE)              += x86/fft_sse.o
@@ -54,4 +56,3 @@ OBJS-$(HAVE_MMX)                       += x86/dnxhd_mmx.o               \
                                           x86/mpegvideo_mmx.o           \
                                           x86/simple_idct_mmx.o         \
 
-MMX-OBJS-$(CONFIG_DCT)                 += x86/dct32_sse.o
diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
new file mode 100644
index 0000000000..f6d5bcf844
--- /dev/null
+++ b/libavcodec/x86/dct32_sse.asm
@@ -0,0 +1,289 @@
+;******************************************************************************
+;* 32 point SSE-optimized DCT transform
+;* Copyright (c) 2010 Vitor Sessak
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA 32
+
+align 32
+ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
+            dd   0.553104,  0.582935,  0.622504,  0.674808
+            dd  -1.169440, -0.972568, -0.839350, -0.744536
+            dd -10.190008, -3.407609, -2.057781, -1.484165
+            dd   0.502419,  0.522499,  0.566944,  0.646822
+            dd   0.788155,  1.060678,  1.722447,  5.101149
+            dd   0.509796,  0.601345,  0.899976,  2.562916
+            dd   1.000000,  1.000000,  1.306563,  0.541196
+            dd   1.000000,  0.707107,  1.000000, -0.707107
+
+
+ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000
+
+%macro BUTTERFLY 4
+    movaps %4, %1
+    subps  %1, %2
+    addps  %2, %4
+    mulps  %1, %3
+%endmacro
+
+%macro BUTTERFLY0 5
+    movaps %4, %1
+    shufps %1, %1, %5
+    xorps  %4, %2
+    addps  %1, %4
+    mulps  %1, %3
+%endmacro
+
+%macro BUTTERFLY2 4
+    BUTTERFLY0 %1, %2, %3, %4, 0x1b
+%endmacro
+
+%macro BUTTERFLY3 4
+    BUTTERFLY0 %1, %2, %3, %4, 0xb1
+%endmacro
+
+INIT_XMM
+section .text align=16
+; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
+cglobal dct32_float_sse, 2,3,8, out, in, tmp
+    ; pass 1
+
+    movaps      m0, [inq+0]
+    movaps      m1, [inq+112]
+    shufps      m1, m1, 0x1b
+    BUTTERFLY   m0, m1, [ps_cos_vec], m3
+
+    movaps      m7, [inq+64]
+    movaps      m4, [inq+48]
+    shufps      m4, m4, 0x1b
+    BUTTERFLY   m7,  m4, [ps_cos_vec+48], m3
+
+
+    ; pass 2
+    movaps      m2, [ps_cos_vec+64]
+    BUTTERFLY   m1, m4, m2, m3
+    movaps      [outq+48], m1
+    movaps      [outq+ 0], m4
+
+    ; pass 1
+    movaps      m1, [inq+16]
+    movaps      m6, [inq+96]
+    shufps      m6, m6, 0x1b
+    BUTTERFLY   m1, m6, [ps_cos_vec+16], m3
+
+    movaps      m4, [inq+80]
+    movaps      m5, [inq+32]
+    shufps      m5, m5, 0x1b
+    BUTTERFLY   m4, m5, [ps_cos_vec+32], m3
+
+    ; pass 2
+    BUTTERFLY   m0, m7, m2, m3
+
+    movaps      m2, [ps_cos_vec+80]
+    BUTTERFLY   m6, m5, m2, m3
+
+    BUTTERFLY   m1, m4, m2, m3
+
+    ; pass 3
+    movaps      m2, [ps_cos_vec+96]
+    shufps      m1, m1, 0x1b
+    BUTTERFLY   m0, m1, m2, m3
+    movaps      [outq+112], m0
+    movaps      [outq+ 96], m1
+
+    movaps      m0, [outq+0]
+    shufps      m5, m5, 0x1b
+    BUTTERFLY   m0, m5, m2, m3
+
+    movaps      m1, [outq+48]
+    shufps      m6, m6, 0x1b
+    BUTTERFLY   m1, m6, m2, m3
+    movaps      [outq+48], m1
+
+    shufps      m4, m4, 0x1b
+    BUTTERFLY   m7, m4, m2, m3
+
+    ; pass 4
+    movaps      m3, [ps_p1p1m1m1+0]
+    movaps      m2, [ps_cos_vec+112]
+
+    BUTTERFLY2  m5, m3, m2, m1
+
+    BUTTERFLY2  m0, m3, m2, m1
+    movaps      [outq+16], m0
+
+    BUTTERFLY2  m6, m3, m2, m1
+    movaps      [outq+32], m6
+
+    movaps      m0, [outq+48]
+    BUTTERFLY2  m0, m3, m2, m1
+    movaps      [outq+48], m0
+
+    BUTTERFLY2  m4, m3, m2, m1
+
+    BUTTERFLY2  m7, m3, m2, m1
+
+    movaps      m6, [outq+96]
+    BUTTERFLY2  m6, m3, m2, m1
+
+    movaps      m0, [outq+112]
+    BUTTERFLY2  m0, m3, m2, m1
+
+    ; pass 5
+    movaps      m2, [ps_cos_vec+128]
+    shufps      m3, m3, 0xcc
+
+    BUTTERFLY3  m5, m3, m2, m1
+    movaps      [outq+0], m5
+
+    movaps      m1, [outq+16]
+    BUTTERFLY3  m1, m3, m2, m5
+    movaps      [outq+16], m1
+
+    BUTTERFLY3  m4, m3, m2, m5
+    movaps      [outq+64], m4
+
+    BUTTERFLY3  m7, m3, m2, m5
+    movaps      [outq+80], m7
+
+    movaps      m5, [outq+32]
+    BUTTERFLY3  m5, m3, m2, m7
+    movaps      [outq+32], m5
+
+    movaps      m4, [outq+48]
+    BUTTERFLY3  m4, m3, m2, m7
+    movaps      [outq+48], m4
+
+    BUTTERFLY3  m6, m3, m2, m7
+    movaps      [outq+96], m6
+
+    BUTTERFLY3  m0, m3, m2, m7
+    movaps      [outq+112], m0
+
+
+    ;    pass 6, no SIMD...
+    movss         m3, [outq+56]
+    mov         tmpd, [outq+4]
+    addss         m3, [outq+60]
+    movss         m7, [outq+72]
+    addss         m4, m3
+    movss         m2, [outq+52]
+    addss         m2, m3
+    movss         m3, [outq+24]
+    addss         m3, [outq+28]
+    addss         m7, [outq+76]
+    addss         m1, m3
+    addss         m5, m4
+    movss [outq+ 16], m1
+    movss         m1, [outq+20]
+    addss         m1, m3
+    movss         m3, [outq+40]
+    movss [outq+ 48], m1
+    addss         m3, [outq+44]
+    movss         m1, [outq+20]
+    addss         m4, m3
+    addss         m3, m2
+    addss         m1, [outq+28]
+    movss [outq+ 40], m3
+    addss         m2, [outq+36]
+    movss         m3, [outq+8]
+    movss [outq+ 56], m2
+    addss         m3, [outq+12]
+    movss [outq+  8], m5
+    movss [outq+ 32], m3
+    movss         m2, [outq+52]
+    movss         m3, [outq+80]
+    movss         m5, [outq+120]
+    movss [outq+ 80], m1
+    movss [outq+ 24], m4
+    addss         m5, [outq+124]
+    movss         m1, [outq+64]
+    addss         m2, [outq+60]
+    addss         m0, m5
+    addss         m5, [outq+116]
+    mov    [outq+64], tmpd
+    addss         m6, m0
+    addss         m1, m6
+    mov         tmpd, [outq+12]
+    movss [outq+  4], m1
+    movss         m1, [outq+88]
+    mov   [outq+ 96], tmpd
+    addss         m1, [outq+92]
+    movss         m4, [outq+104]
+    mov         tmpd, [outq+28]
+    addss         m4, [outq+108]
+    addss         m0, m4
+    addss         m3, m1
+    addss         m1, [outq+84]
+    addss         m4, m5
+    addss         m6, m3
+    addss         m3, m0
+    addss         m0, m7
+    addss         m5, [outq+100]
+    addss         m7, m4
+    mov   [outq+112], tmpd
+    movss [outq+ 28], m0
+    movss         m0, [outq+36]
+    movss [outq+ 36], m7
+    addss         m4, m1
+    movss         m7, [outq+116]
+    addss         m0, m2
+    addss         m7, [outq+124]
+    movss [outq+ 72], m0
+    movss         m0, [outq+44]
+    movss [outq+ 12], m6
+    movss [outq+ 20], m3
+    addss         m2, m0
+    movss [outq+ 44], m4
+    movss [outq+ 88], m2
+    addss         m0, [outq+60]
+    mov         tmpd, [outq+60]
+    mov   [outq+120], tmpd
+    movss [outq+104], m0
+    addss         m1, m5
+    addss         m5, [outq+68]
+    movss  [outq+52], m1
+    movss  [outq+60], m5
+    movss         m1, [outq+68]
+    movss         m5, [outq+100]
+    addss         m5, m7
+    addss         m7, [outq+108]
+    addss         m1, m5
+    movss         m2, [outq+84]
+    addss         m2, [outq+92]
+    addss         m5, m2
+    movss [outq+ 68], m1
+    addss         m2, m7
+    movss         m1, [outq+76]
+    movss [outq+ 84], m2
+    movss [outq+ 76], m5
+    movss         m2, [outq+108]
+    addss         m7, m1
+    addss         m2, [outq+124]
+    addss         m1, m2
+    addss         m2, [outq+92]
+    movss [outq+100], m1
+    movss [outq+108], m2
+    movss         m2, [outq+92]
+    movss [outq+ 92], m7
+    addss         m2, [outq+124]
+    movss [outq+116], m2
+    RET
diff --git a/libavcodec/x86/dct32_sse.c b/libavcodec/x86/dct32_sse.c
deleted file mode 100644
index 5303c6d5ef..0000000000
--- a/libavcodec/x86/dct32_sse.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * 32 point SSE-optimized DCT transform
- * Copyright (c) 2010 Vitor Sessak
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/x86_cpu.h"
-#include "libavutil/mem.h"
-#include "libavcodec/dsputil.h"
-#include "fft.h"
-
-DECLARE_ALIGNED(16, static const float, b1)[] = {
-     0.500603,  0.505471,  0.515447,  0.531043,
-     0.553104,  0.582935,  0.622504,  0.674808,
-    -1.169440, -0.972568, -0.839350, -0.744536,
-   -10.190008, -3.407609, -2.057781, -1.484165,
-     0.502419,  0.522499,  0.566944,  0.646822,
-     0.788155,  1.060678,  1.722447,  5.101149,
-     0.509796,  0.601345,  0.899976,  2.562916,
-     1.000000,  1.000000,  1.306563,  0.541196,
-     1.000000,  0.707107,  1.000000, -0.707107
-};
-
-DECLARE_ALIGNED(16, static const int32_t, smask)[4] = {
-    0, 0, 0x80000000, 0x80000000
-};
-
-/* butterfly operator */
-#define BUTTERFLY(a,b,c,tmp)                            \
-    "movaps  %%" #a    ", %%" #tmp  "             \n\t" \
-    "subps   %%" #b    ", %%" #a    "             \n\t" \
-    "addps   %%" #tmp  ", %%" #b    "             \n\t" \
-    "mulps     " #c    ", %%" #a    "             \n\t"
-
-///* Same as BUTTERFLY when vectors a and b overlap */
-#define BUTTERFLY0(val, mask, cos, tmp, shuf)                            \
-    "movaps  %%" #val  ", %%" #tmp  "             \n\t"                  \
-    "shufps    " #shuf ", %%" #val  ",%%" #val "  \n\t"                  \
-    "xorps   %%" #mask ", %%" #tmp  "             \n\t" /* flip signs */ \
-    "addps   %%" #tmp  ", %%" #val  "             \n\t"                  \
-    "mulps   %%" #cos  ", %%" #val  "             \n\t"
-
-#define BUTTERFLY2(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0x1b)
-#define BUTTERFLY3(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0xb1)
-
-void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-{
-    int32_t tmp1 = 0;
-    __asm__ volatile(
-        /* pass 1 */
-
-        "movaps    (%4), %%xmm0           \n\t"
-        "movaps 112(%4), %%xmm1           \n\t"
-        "shufps   $0x1b, %%xmm1, %%xmm1   \n\t"
-        BUTTERFLY(xmm0, xmm1, (%2), xmm3)
-
-        "movaps  64(%4), %%xmm7           \n\t"
-        "movaps  48(%4), %%xmm4           \n\t"
-        "shufps   $0x1b, %%xmm4, %%xmm4   \n\t"
-        BUTTERFLY(xmm7, xmm4, 48(%2), xmm3)
-
-
-        /* pass 2 */
-        "movaps  64(%2), %%xmm2           \n\t"
-        BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3)
-        "movaps  %%xmm1, 48(%1)           \n\t"
-        "movaps  %%xmm4, (%1)             \n\t"
-
-        /* pass 1 */
-        "movaps  16(%4), %%xmm1           \n\t"
-        "movaps  96(%4), %%xmm6           \n\t"
-        "shufps   $0x1b, %%xmm6, %%xmm6   \n\t"
-        BUTTERFLY(xmm1, xmm6, 16(%2), xmm3)
-
-        "movaps  80(%4), %%xmm4           \n\t"
-        "movaps  32(%4), %%xmm5           \n\t"
-        "shufps   $0x1b, %%xmm5, %%xmm5   \n\t"
-        BUTTERFLY(xmm4, xmm5, 32(%2), xmm3)
-
-        /* pass 2 */
-        BUTTERFLY(xmm0, xmm7, %%xmm2, xmm3)
-
-        "movaps  80(%2), %%xmm2           \n\t"
-        BUTTERFLY(xmm6, xmm5, %%xmm2, xmm3)
-
-        BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3)
-
-        /* pass 3 */
-        "movaps  96(%2), %%xmm2           \n\t"
-        "shufps   $0x1b, %%xmm1, %%xmm1   \n\t"
-        BUTTERFLY(xmm0, xmm1, %%xmm2, xmm3)
-        "movaps  %%xmm0, 112(%1)          \n\t"
-        "movaps  %%xmm1,  96(%1)          \n\t"
-
-        "movaps   0(%1), %%xmm0           \n\t"
-        "shufps   $0x1b, %%xmm5, %%xmm5   \n\t"
-        BUTTERFLY(xmm0, xmm5, %%xmm2, xmm3)
-
-        "movaps  48(%1), %%xmm1           \n\t"
-        "shufps   $0x1b, %%xmm6, %%xmm6   \n\t"
-        BUTTERFLY(xmm1, xmm6, %%xmm2, xmm3)
-        "movaps  %%xmm1,  48(%1)          \n\t"
-
-        "shufps   $0x1b, %%xmm4, %%xmm4   \n\t"
-        BUTTERFLY(xmm7, xmm4, %%xmm2, xmm3)
-
-        /* pass 4 */
-        "movaps    (%3), %%xmm3           \n\t"
-        "movaps 112(%2), %%xmm2           \n\t"
-
-        BUTTERFLY2(xmm5, xmm3, xmm2, xmm1)
-
-        BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
-        "movaps  %%xmm0, 16(%1)           \n\t"
-
-        BUTTERFLY2(xmm6, xmm3, xmm2, xmm1)
-        "movaps  %%xmm6, 32(%1)           \n\t"
-
-        "movaps  48(%1), %%xmm0           \n\t"
-        BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
-        "movaps  %%xmm0, 48(%1)           \n\t"
-
-        BUTTERFLY2(xmm4, xmm3, xmm2, xmm1)
-
-        BUTTERFLY2(xmm7, xmm3, xmm2, xmm1)
-
-        "movaps  96(%1), %%xmm6           \n\t"
-        BUTTERFLY2(xmm6, xmm3, xmm2, xmm1)
-
-        "movaps 112(%1), %%xmm0           \n\t"
-        BUTTERFLY2(xmm0, xmm3, xmm2, xmm1)
-
-        /* pass 5 */
-        "movaps 128(%2), %%xmm2           \n\t"
-        "shufps   $0xCC, %%xmm3,%%xmm3    \n\t"
-
-        BUTTERFLY3(xmm5, xmm3, xmm2, xmm1)
-        "movaps  %%xmm5, (%1)             \n\t"
-
-        "movaps  16(%1), %%xmm1           \n\t"
-        BUTTERFLY3(xmm1, xmm3, xmm2, xmm5)
-        "movaps  %%xmm1, 16(%1)           \n\t"
-
-        BUTTERFLY3(xmm4, xmm3, xmm2, xmm5)
-        "movaps  %%xmm4, 64(%1)           \n\t"
-
-        BUTTERFLY3(xmm7, xmm3, xmm2, xmm5)
-        "movaps  %%xmm7, 80(%1)           \n\t"
-
-        "movaps  32(%1), %%xmm5           \n\t"
-        BUTTERFLY3(xmm5, xmm3, xmm2, xmm7)
-        "movaps  %%xmm5, 32(%1)           \n\t"
-
-        "movaps  48(%1), %%xmm4           \n\t"
-        BUTTERFLY3(xmm4, xmm3, xmm2, xmm7)
-        "movaps  %%xmm4, 48(%1)           \n\t"
-
-        BUTTERFLY3(xmm6, xmm3, xmm2, xmm7)
-        "movaps  %%xmm6, 96(%1)           \n\t"
-
-        BUTTERFLY3(xmm0, xmm3, xmm2, xmm7)
-        "movaps  %%xmm0, 112(%1)          \n\t"
-
-
-        /* pass 6, no SIMD... */
-        "movss    56(%1),  %%xmm3           \n\t"
-        "movl      4(%1),      %0           \n\t"
-        "addss    60(%1),  %%xmm3           \n\t"
-        "movss    72(%1),  %%xmm7           \n\t"
-        "addss    %%xmm3,  %%xmm4           \n\t"
-        "movss    52(%1),  %%xmm2           \n\t"
-        "addss    %%xmm3,  %%xmm2           \n\t"
-        "movss    24(%1),  %%xmm3           \n\t"
-        "addss    28(%1),  %%xmm3           \n\t"
-        "addss    76(%1),  %%xmm7           \n\t"
-        "addss    %%xmm3,  %%xmm1           \n\t"
-        "addss    %%xmm4,  %%xmm5           \n\t"
-        "movss    %%xmm1,  16(%1)           \n\t"
-        "movss    20(%1),  %%xmm1           \n\t"
-        "addss    %%xmm3,  %%xmm1           \n\t"
-        "movss    40(%1),  %%xmm3           \n\t"
-        "movss    %%xmm1,  48(%1)           \n\t"
-        "addss    44(%1),  %%xmm3           \n\t"
-        "movss    20(%1),  %%xmm1           \n\t"
-        "addss    %%xmm3,  %%xmm4           \n\t"
-        "addss    %%xmm2,  %%xmm3           \n\t"
-        "addss    28(%1),  %%xmm1           \n\t"
-        "movss    %%xmm3,  40(%1)           \n\t"
-        "addss    36(%1),  %%xmm2           \n\t"
-        "movss     8(%1),  %%xmm3           \n\t"
-        "movss    %%xmm2,  56(%1)           \n\t"
-        "addss    12(%1),  %%xmm3           \n\t"
-        "movss    %%xmm5,   8(%1)           \n\t"
-        "movss    %%xmm3,  32(%1)           \n\t"
-        "movss    52(%1),  %%xmm2           \n\t"
-        "movss    80(%1),  %%xmm3           \n\t"
-        "movss   120(%1),  %%xmm5           \n\t"
-        "movss    %%xmm1,  80(%1)           \n\t"
-        "movss    %%xmm4,  24(%1)           \n\t"
-        "addss   124(%1),  %%xmm5           \n\t"
-        "movss    64(%1),  %%xmm1           \n\t"
-        "addss    60(%1),  %%xmm2           \n\t"
-        "addss    %%xmm5,  %%xmm0           \n\t"
-        "addss   116(%1),  %%xmm5           \n\t"
-        "movl         %0,  64(%1)           \n\t"
-        "addss    %%xmm0,  %%xmm6           \n\t"
-        "addss    %%xmm6,  %%xmm1           \n\t"
-        "movl     12(%1),      %0           \n\t"
-        "movss    %%xmm1,   4(%1)           \n\t"
-        "movss    88(%1),  %%xmm1           \n\t"
-        "movl         %0,  96(%1)           \n\t"
-        "addss    92(%1),  %%xmm1           \n\t"
-        "movss   104(%1),  %%xmm4           \n\t"
-        "movl     28(%1),      %0           \n\t"
-        "addss   108(%1),  %%xmm4           \n\t"
-        "addss    %%xmm4,  %%xmm0           \n\t"
-        "addss    %%xmm1,  %%xmm3           \n\t"
-        "addss    84(%1),  %%xmm1           \n\t"
-        "addss    %%xmm5,  %%xmm4           \n\t"
-        "addss    %%xmm3,  %%xmm6           \n\t"
-        "addss    %%xmm0,  %%xmm3           \n\t"
-        "addss    %%xmm7,  %%xmm0           \n\t"
-        "addss   100(%1),  %%xmm5           \n\t"
-        "addss    %%xmm4,  %%xmm7           \n\t"
-        "movl         %0, 112(%1)           \n\t"
-        "movss    %%xmm0,  28(%1)           \n\t"
-        "movss    36(%1),  %%xmm0           \n\t"
-        "movss    %%xmm7,  36(%1)           \n\t"
-        "addss    %%xmm1,  %%xmm4           \n\t"
-        "movss   116(%1),  %%xmm7           \n\t"
-        "addss    %%xmm2,  %%xmm0           \n\t"
-        "addss   124(%1),  %%xmm7           \n\t"
-        "movss    %%xmm0,  72(%1)           \n\t"
-        "movss    44(%1),  %%xmm0           \n\t"
-        "movss    %%xmm6,  12(%1)           \n\t"
-        "movss    %%xmm3,  20(%1)           \n\t"
-        "addss    %%xmm0,  %%xmm2           \n\t"
-        "movss    %%xmm4,  44(%1)           \n\t"
-        "movss    %%xmm2,  88(%1)           \n\t"
-        "addss    60(%1),  %%xmm0           \n\t"
-        "movl     60(%1),      %0           \n\t"
-        "movl         %0, 120(%1)           \n\t"
-        "movss    %%xmm0, 104(%1)           \n\t"
-        "addss    %%xmm5,  %%xmm1           \n\t"
-        "addss    68(%1),  %%xmm5           \n\t"
-        "movss    %%xmm1,  52(%1)           \n\t"
-        "movss    %%xmm5,  60(%1)           \n\t"
-        "movss    68(%1),  %%xmm1           \n\t"
-        "movss   100(%1),  %%xmm5           \n\t"
-        "addss    %%xmm7,  %%xmm5           \n\t"
-        "addss   108(%1),  %%xmm7           \n\t"
-        "addss    %%xmm5,  %%xmm1           \n\t"
-        "movss    84(%1),  %%xmm2           \n\t"
-        "addss    92(%1),  %%xmm2           \n\t"
-        "addss    %%xmm2,  %%xmm5           \n\t"
-        "movss    %%xmm1,  68(%1)           \n\t"
-        "addss    %%xmm7,  %%xmm2           \n\t"
-        "movss    76(%1),  %%xmm1           \n\t"
-        "movss    %%xmm2,  84(%1)           \n\t"
-        "movss    %%xmm5,  76(%1)           \n\t"
-        "movss   108(%1),  %%xmm2           \n\t"
-        "addss    %%xmm1,  %%xmm7           \n\t"
-        "addss   124(%1),  %%xmm2           \n\t"
-        "addss    %%xmm2,  %%xmm1           \n\t"
-        "addss    92(%1),  %%xmm2           \n\t"
-        "movss    %%xmm1, 100(%1)           \n\t"
-        "movss    %%xmm2, 108(%1)           \n\t"
-        "movss    92(%1),  %%xmm2           \n\t"
-        "movss    %%xmm7,  92(%1)           \n\t"
-        "addss   124(%1),  %%xmm2           \n\t"
-        "movss    %%xmm2, 116(%1)           \n\t"
-        :"+&r"(tmp1)
-        :"r"(out), "r"(b1), "r"(smask), "r"(in)
-        :XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3",
-                      "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
-         "memory"
-        );
-}
-

From 4e653b98c888a922ee192c6c8f914dde6ea2dc40 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Sat, 14 May 2011 14:16:30 +0200
Subject: [PATCH 180/830] dct32: Change pass 6 permutation to allow for AVX
 implementation

---
 libavcodec/x86/dct32_sse.asm | 98 ++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index f6d5bcf844..fa0a502acf 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -156,7 +156,7 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
 
     movaps      m1, [outq+16]
     BUTTERFLY3  m1, m3, m2, m5
-    movaps      [outq+16], m1
+    movaps      [outq+96], m1
 
     BUTTERFLY3  m4, m3, m2, m5
     movaps      [outq+64], m4
@@ -173,47 +173,46 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     movaps      [outq+48], m4
 
     BUTTERFLY3  m6, m3, m2, m7
-    movaps      [outq+96], m6
+    movaps      [outq+16], m6
 
     BUTTERFLY3  m0, m3, m2, m7
     movaps      [outq+112], m0
 
 
     ;    pass 6, no SIMD...
-    movss         m3, [outq+56]
     mov         tmpd, [outq+4]
-    addss         m3, [outq+60]
     movss         m7, [outq+72]
+    addss         m7, [outq+76]
+    movss         m3, [outq+56]
+    addss         m3, [outq+60]
     addss         m4, m3
     movss         m2, [outq+52]
     addss         m2, m3
-    movss         m3, [outq+24]
-    addss         m3, [outq+28]
-    addss         m7, [outq+76]
+    movss         m3, [outq+104]
+    addss         m3, [outq+108]
     addss         m1, m3
     addss         m5, m4
     movss [outq+ 16], m1
-    movss         m1, [outq+20]
+    movss         m1, [outq+100]
     addss         m1, m3
     movss         m3, [outq+40]
     movss [outq+ 48], m1
     addss         m3, [outq+44]
-    movss         m1, [outq+20]
+    movss         m1, [outq+100]
     addss         m4, m3
     addss         m3, m2
-    addss         m1, [outq+28]
+    addss         m1, [outq+108]
     movss [outq+ 40], m3
     addss         m2, [outq+36]
     movss         m3, [outq+8]
     movss [outq+ 56], m2
     addss         m3, [outq+12]
-    movss [outq+  8], m5
     movss [outq+ 32], m3
-    movss         m2, [outq+52]
     movss         m3, [outq+80]
-    movss         m5, [outq+120]
+    movss [outq+  8], m5
     movss [outq+ 80], m1
-    movss [outq+ 24], m4
+    movss         m2, [outq+52]
+    movss         m5, [outq+120]
     addss         m5, [outq+124]
     movss         m1, [outq+64]
     addss         m2, [outq+60]
@@ -223,67 +222,68 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     addss         m6, m0
     addss         m1, m6
     mov         tmpd, [outq+12]
-    movss [outq+  4], m1
-    movss         m1, [outq+88]
     mov   [outq+ 96], tmpd
-    addss         m1, [outq+92]
-    movss         m4, [outq+104]
-    mov         tmpd, [outq+28]
-    addss         m4, [outq+108]
-    addss         m0, m4
-    addss         m3, m1
-    addss         m1, [outq+84]
-    addss         m4, m5
+    movss [outq+  4], m1
+    movss         m1, [outq+24]
+    movss [outq+ 24], m4
+    movss         m4, [outq+88]
+    addss         m4, [outq+92]
+    addss         m3, m4
+    addss         m4, [outq+84]
+    mov         tmpd, [outq+108]
+    addss         m1, [outq+28]
+    addss         m0, m1
+    addss         m1, m5
     addss         m6, m3
     addss         m3, m0
     addss         m0, m7
-    addss         m5, [outq+100]
-    addss         m7, m4
+    addss         m5, [outq+20]
+    addss         m7, m1
+    movss [outq+ 12], m6
     mov   [outq+112], tmpd
+    movss         m6, [outq+28]
     movss [outq+ 28], m0
     movss         m0, [outq+36]
     movss [outq+ 36], m7
-    addss         m4, m1
+    addss         m1, m4
     movss         m7, [outq+116]
     addss         m0, m2
     addss         m7, [outq+124]
     movss [outq+ 72], m0
     movss         m0, [outq+44]
-    movss [outq+ 12], m6
-    movss [outq+ 20], m3
     addss         m2, m0
-    movss [outq+ 44], m4
+    movss [outq+ 44], m1
     movss [outq+ 88], m2
     addss         m0, [outq+60]
     mov         tmpd, [outq+60]
     mov   [outq+120], tmpd
     movss [outq+104], m0
-    addss         m1, m5
+    addss         m4, m5
     addss         m5, [outq+68]
-    movss  [outq+52], m1
+    movss  [outq+52], m4
     movss  [outq+60], m5
-    movss         m1, [outq+68]
-    movss         m5, [outq+100]
+    movss         m4, [outq+68]
+    movss         m5, [outq+20]
+    movss [outq+ 20], m3
     addss         m5, m7
-    addss         m7, [outq+108]
-    addss         m1, m5
+    addss         m7, m6
+    addss         m4, m5
     movss         m2, [outq+84]
     addss         m2, [outq+92]
     addss         m5, m2
-    movss [outq+ 68], m1
+    movss [outq+ 68], m4
     addss         m2, m7
-    movss         m1, [outq+76]
+    movss         m4, [outq+76]
     movss [outq+ 84], m2
     movss [outq+ 76], m5
-    movss         m2, [outq+108]
-    addss         m7, m1
-    addss         m2, [outq+124]
-    addss         m1, m2
-    addss         m2, [outq+92]
-    movss [outq+100], m1
-    movss [outq+108], m2
-    movss         m2, [outq+92]
-    movss [outq+ 92], m7
-    addss         m2, [outq+124]
-    movss [outq+116], m2
+    addss         m7, m4
+    addss         m6, [outq+124]
+    addss         m4, m6
+    addss         m6, [outq+92]
+    movss [outq+100], m4
+    movss [outq+108], m6
+    movss         m6, [outq+92]
+    movss  [outq+92], m7
+    addss         m6, [outq+124]
+    movss [outq+116], m6
     RET

From 6204feb160c843320f6001d7e2bb2361c82b90ca Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Sat, 14 May 2011 14:17:15 +0200
Subject: [PATCH 181/830] dct32: Add AVX implementation of 32-point DCT

---
 libavcodec/mpegaudiodec.c    |   4 +-
 libavcodec/x86/dct32_sse.asm | 350 ++++++++++++++++++++++-------------
 libavcodec/x86/fft.c         |   4 +-
 libavcodec/x86/fft.h         |   1 +
 4 files changed, 232 insertions(+), 127 deletions(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 960d13d1e8..ccc93ad78a 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -69,9 +69,9 @@ typedef struct MPADecodeContext {
     uint32_t free_format_next_header;
     GetBitContext gb;
     GetBitContext in_gb;
-    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
+    DECLARE_ALIGNED(32, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
     int synth_buf_offset[MPA_MAX_CHANNELS];
-    DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
+    DECLARE_ALIGNED(32, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
     INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
     GranuleDef granules[2][2]; /* Used in Layer 3 */
 #ifdef DEBUG
diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index fa0a502acf..2e1176cd84 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -20,31 +20,41 @@
 ;******************************************************************************
 
 %include "x86inc.asm"
+%include "config.asm"
 
 SECTION_RODATA 32
 
 align 32
 ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
             dd   0.553104,  0.582935,  0.622504,  0.674808
-            dd  -1.169440, -0.972568, -0.839350, -0.744536
             dd -10.190008, -3.407609, -2.057781, -1.484165
+            dd  -1.169440, -0.972568, -0.839350, -0.744536
             dd   0.502419,  0.522499,  0.566944,  0.646822
             dd   0.788155,  1.060678,  1.722447,  5.101149
             dd   0.509796,  0.601345,  0.899976,  2.562916
+            dd   0.509796,  0.601345,  0.899976,  2.562916
             dd   1.000000,  1.000000,  1.306563,  0.541196
+            dd   1.000000,  1.000000,  1.306563,  0.541196
+            dd   1.000000,  0.707107,  1.000000, -0.707107
             dd   1.000000,  0.707107,  1.000000, -0.707107
 
 
-ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000
+ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 
-%macro BUTTERFLY 4
+%macro BUTTERFLY_SSE 4
     movaps %4, %1
     subps  %1, %2
     addps  %2, %4
     mulps  %1, %3
 %endmacro
 
-%macro BUTTERFLY0 5
+%macro BUTTERFLY_AVX 4
+    vsubps  %4, %1, %2
+    vaddps  %2, %2, %1
+    vmulps  %1, %4, %3
+%endmacro
+
+%macro BUTTERFLY0_SSE 5
     movaps %4, %1
     shufps %1, %1, %5
     xorps  %4, %2
@@ -52,6 +62,13 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000
     mulps  %1, %3
 %endmacro
 
+%macro BUTTERFLY0_AVX 5
+    vshufps %4, %1, %1, %5
+    vxorps  %1, %1, %2
+    vaddps  %4, %4, %1
+    vmulps  %1, %4, %3
+%endmacro
+
 %macro BUTTERFLY2 4
     BUTTERFLY0 %1, %2, %3, %4, 0x1b
 %endmacro
@@ -60,126 +77,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000
     BUTTERFLY0 %1, %2, %3, %4, 0xb1
 %endmacro
 
-INIT_XMM
-section .text align=16
-; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_sse, 2,3,8, out, in, tmp
-    ; pass 1
-
-    movaps      m0, [inq+0]
-    movaps      m1, [inq+112]
-    shufps      m1, m1, 0x1b
-    BUTTERFLY   m0, m1, [ps_cos_vec], m3
-
-    movaps      m7, [inq+64]
-    movaps      m4, [inq+48]
-    shufps      m4, m4, 0x1b
-    BUTTERFLY   m7,  m4, [ps_cos_vec+48], m3
-
-
-    ; pass 2
-    movaps      m2, [ps_cos_vec+64]
-    BUTTERFLY   m1, m4, m2, m3
-    movaps      [outq+48], m1
-    movaps      [outq+ 0], m4
-
-    ; pass 1
-    movaps      m1, [inq+16]
-    movaps      m6, [inq+96]
-    shufps      m6, m6, 0x1b
-    BUTTERFLY   m1, m6, [ps_cos_vec+16], m3
-
-    movaps      m4, [inq+80]
-    movaps      m5, [inq+32]
-    shufps      m5, m5, 0x1b
-    BUTTERFLY   m4, m5, [ps_cos_vec+32], m3
-
-    ; pass 2
-    BUTTERFLY   m0, m7, m2, m3
-
-    movaps      m2, [ps_cos_vec+80]
-    BUTTERFLY   m6, m5, m2, m3
-
-    BUTTERFLY   m1, m4, m2, m3
-
-    ; pass 3
-    movaps      m2, [ps_cos_vec+96]
-    shufps      m1, m1, 0x1b
-    BUTTERFLY   m0, m1, m2, m3
-    movaps      [outq+112], m0
-    movaps      [outq+ 96], m1
-
-    movaps      m0, [outq+0]
-    shufps      m5, m5, 0x1b
-    BUTTERFLY   m0, m5, m2, m3
-
-    movaps      m1, [outq+48]
-    shufps      m6, m6, 0x1b
-    BUTTERFLY   m1, m6, m2, m3
-    movaps      [outq+48], m1
-
-    shufps      m4, m4, 0x1b
-    BUTTERFLY   m7, m4, m2, m3
-
-    ; pass 4
-    movaps      m3, [ps_p1p1m1m1+0]
-    movaps      m2, [ps_cos_vec+112]
-
-    BUTTERFLY2  m5, m3, m2, m1
-
-    BUTTERFLY2  m0, m3, m2, m1
-    movaps      [outq+16], m0
-
-    BUTTERFLY2  m6, m3, m2, m1
-    movaps      [outq+32], m6
-
-    movaps      m0, [outq+48]
-    BUTTERFLY2  m0, m3, m2, m1
-    movaps      [outq+48], m0
-
-    BUTTERFLY2  m4, m3, m2, m1
-
-    BUTTERFLY2  m7, m3, m2, m1
-
-    movaps      m6, [outq+96]
-    BUTTERFLY2  m6, m3, m2, m1
-
-    movaps      m0, [outq+112]
-    BUTTERFLY2  m0, m3, m2, m1
-
-    ; pass 5
-    movaps      m2, [ps_cos_vec+128]
-    shufps      m3, m3, 0xcc
-
-    BUTTERFLY3  m5, m3, m2, m1
-    movaps      [outq+0], m5
-
-    movaps      m1, [outq+16]
-    BUTTERFLY3  m1, m3, m2, m5
-    movaps      [outq+96], m1
-
-    BUTTERFLY3  m4, m3, m2, m5
-    movaps      [outq+64], m4
-
-    BUTTERFLY3  m7, m3, m2, m5
-    movaps      [outq+80], m7
-
-    movaps      m5, [outq+32]
-    BUTTERFLY3  m5, m3, m2, m7
-    movaps      [outq+32], m5
-
-    movaps      m4, [outq+48]
-    BUTTERFLY3  m4, m3, m2, m7
-    movaps      [outq+48], m4
-
-    BUTTERFLY3  m6, m3, m2, m7
-    movaps      [outq+16], m6
-
-    BUTTERFLY3  m0, m3, m2, m7
-    movaps      [outq+112], m0
-
-
-    ;    pass 6, no SIMD...
+%macro PASS6_AND_PERMUTE 0
     mov         tmpd, [outq+4]
     movss         m7, [outq+72]
     addss         m7, [outq+76]
@@ -286,4 +184,208 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     movss  [outq+92], m7
     addss         m6, [outq+124]
     movss [outq+116], m6
+%endmacro
+
+%define BUTTERFLY  BUTTERFLY_AVX
+%define BUTTERFLY0 BUTTERFLY0_AVX
+
+INIT_YMM
+section .text align=16
+%ifdef HAVE_AVX
+; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
+cglobal dct32_float_avx, 2,3,8, out, in, tmp
+    ; pass 1
+    vmovaps     m4, [inq+0]
+    vinsertf128 m5, m5, [inq+96], 1
+    vinsertf128 m5, m5, [inq+112], 0
+    vshufps     m5, m5, m5, 0x1b
+    BUTTERFLY   m4, m5, [ps_cos_vec], m6
+
+    vmovaps     m2, [inq+64]
+    vinsertf128 m6, m6, [inq+32], 1
+    vinsertf128 m6, m6, [inq+48], 0
+    vshufps     m6, m6, m6, 0x1b
+    BUTTERFLY   m2, m6, [ps_cos_vec+32], m0
+
+    ; pass 2
+
+    BUTTERFLY  m5, m6, [ps_cos_vec+64], m0
+    BUTTERFLY  m4, m2, [ps_cos_vec+64], m7
+
+
+    ; pass 3
+    vperm2f128  m3, m6, m4, 0x31
+    vperm2f128  m1, m6, m4, 0x20
+    vshufps     m3, m3, m3, 0x1b
+
+    BUTTERFLY   m1, m3, [ps_cos_vec+96], m6
+
+
+    vperm2f128  m4, m5, m2, 0x20
+    vperm2f128  m5, m5, m2, 0x31
+    vshufps     m5, m5, m5, 0x1b
+
+    BUTTERFLY   m4, m5, [ps_cos_vec+96], m6
+
+    ; pass 4
+    vmovaps m6, [ps_p1p1m1m1+0]
+    vmovaps m2, [ps_cos_vec+128]
+
+    BUTTERFLY2  m5, m6, m2, m7
+    BUTTERFLY2  m4, m6, m2, m7
+    BUTTERFLY2  m1, m6, m2, m7
+    BUTTERFLY2  m3, m6, m2, m7
+
+
+    ; pass 5
+    vshufps m6, m6, m6, 0xcc
+    vmovaps m2, [ps_cos_vec+160]
+
+    BUTTERFLY3  m5, m6, m2, m7
+    BUTTERFLY3  m4, m6, m2, m7
+    BUTTERFLY3  m1, m6, m2, m7
+    BUTTERFLY3  m3, m6, m2, m7
+
+    vperm2f128  m6, m3, m3, 0x31
+    vmovaps [outq], m3
+
+    vextractf128  [outq+64], m5, 1
+    vextractf128  [outq+32], m5, 0
+
+    vextractf128  [outq+80], m4, 1
+    vextractf128  [outq+48], m4, 0
+
+    vperm2f128  m0, m1, m1, 0x31
+    vmovaps [outq+96], m1
+
+    vzeroupper
+
+    ;    pass 6, no SIMD...
+INIT_XMM
+    PASS6_AND_PERMUTE
+    RET
+%endif
+
+%define BUTTERFLY  BUTTERFLY_SSE
+%define BUTTERFLY0 BUTTERFLY0_SSE
+
+INIT_XMM
+; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
+cglobal dct32_float_sse, 2,3,8, out, in, tmp
+    ; pass 1
+
+    movaps      m0, [inq+0]
+    movaps      m1, [inq+112]
+    shufps      m1, m1, 0x1b
+    BUTTERFLY   m0, m1, [ps_cos_vec], m3
+
+    movaps      m7, [inq+64]
+    movaps      m4, [inq+48]
+    shufps      m4, m4, 0x1b
+    BUTTERFLY   m7, m4, [ps_cos_vec+32], m3
+
+    ; pass 2
+    movaps      m2, [ps_cos_vec+64]
+    BUTTERFLY   m1, m4, m2, m3
+    movaps      [outq+48], m1
+    movaps      [outq+ 0], m4
+
+    ; pass 1
+    movaps      m1, [inq+16]
+    movaps      m6, [inq+96]
+    shufps      m6, m6, 0x1b
+    BUTTERFLY   m1, m6, [ps_cos_vec+16], m3
+
+    movaps      m4, [inq+80]
+    movaps      m5, [inq+32]
+    shufps      m5, m5, 0x1b
+    BUTTERFLY   m4, m5, [ps_cos_vec+48], m3
+
+    ; pass 2
+    BUTTERFLY   m0, m7, m2, m3
+
+    movaps      m2, [ps_cos_vec+80]
+    BUTTERFLY   m6, m5, m2, m3
+
+    BUTTERFLY   m1, m4, m2, m3
+
+    ; pass 3
+    movaps      m2, [ps_cos_vec+96]
+    shufps      m1, m1, 0x1b
+    BUTTERFLY   m0, m1, m2, m3
+    movaps      [outq+112], m0
+    movaps      [outq+ 96], m1
+
+    movaps      m0, [outq+0]
+    shufps      m5, m5, 0x1b
+    BUTTERFLY   m0, m5, m2, m3
+
+    movaps      m1, [outq+48]
+    shufps      m6, m6, 0x1b
+    BUTTERFLY   m1, m6, m2, m3
+    movaps      [outq+48], m1
+
+    shufps      m4, m4, 0x1b
+    BUTTERFLY   m7, m4, m2, m3
+
+    ; pass 4
+    movaps      m3, [ps_p1p1m1m1+0]
+    movaps      m2, [ps_cos_vec+128]
+
+    BUTTERFLY2  m5, m3, m2, m1
+
+    BUTTERFLY2  m0, m3, m2, m1
+    movaps      [outq+16], m0
+
+    BUTTERFLY2  m6, m3, m2, m1
+    movaps      [outq+32], m6
+
+    movaps      m0, [outq+48]
+    BUTTERFLY2  m0, m3, m2, m1
+    movaps      [outq+48], m0
+
+    BUTTERFLY2  m4, m3, m2, m1
+
+    BUTTERFLY2  m7, m3, m2, m1
+
+    movaps      m6, [outq+96]
+    BUTTERFLY2  m6, m3, m2, m1
+
+    movaps      m0, [outq+112]
+    BUTTERFLY2  m0, m3, m2, m1
+
+    ; pass 5
+    movaps      m2, [ps_cos_vec+160]
+    shufps      m3, m3, 0xcc
+
+    BUTTERFLY3  m5, m3, m2, m1
+    movaps      [outq+0], m5
+
+    movaps      m1, [outq+16]
+    BUTTERFLY3  m1, m3, m2, m5
+    movaps      [outq+96], m1
+
+    BUTTERFLY3  m4, m3, m2, m5
+    movaps      [outq+64], m4
+
+    BUTTERFLY3  m7, m3, m2, m5
+    movaps      [outq+80], m7
+
+    movaps      m5, [outq+32]
+    BUTTERFLY3  m5, m3, m2, m7
+    movaps      [outq+32], m5
+
+    movaps      m4, [outq+48]
+    BUTTERFLY3  m4, m3, m2, m7
+    movaps      [outq+48], m4
+
+    BUTTERFLY3  m6, m3, m2, m7
+    movaps      [outq+16], m6
+
+    BUTTERFLY3  m0, m3, m2, m7
+    movaps      [outq+112], m0
+
+
+    ;    pass 6, no SIMD...
+    PASS6_AND_PERMUTE
     RET
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index b29412c1dc..8eef4214a2 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -57,7 +57,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
 av_cold void ff_dct_init_mmx(DCTContext *s)
 {
     int has_vectors = av_get_cpu_flags();
-    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
+    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
+        s->dct32 = ff_dct32_float_avx;
+    else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
         s->dct32 = ff_dct32_float_sse;
 }
 #endif
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index c6379050d9..0ade2b2e7b 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -35,5 +35,6 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
 void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
+void ff_dct32_float_avx(FFTSample *out, const FFTSample *in);
 
 #endif /* AVCODEC_X86_FFT_H */

From 71cc331cab8d61130048f3003f2ca77cfb94e3f3 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 21 May 2011 12:05:49 +0200
Subject: [PATCH 182/830] ffmpeg: get rid of the -vglobal option.

It's badly documented and does the same thing as -flags global_header,
so it's redundant.
---
 ffmpeg.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 71be22e05e..0c9545172f 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -208,7 +208,6 @@ static float audio_drift_threshold= 0.1;
 static int copy_ts= 0;
 static int copy_tb;
 static int opt_shortest = 0;
-static int video_global_header = 0;
 static char *vstats_filename;
 static FILE *vstats_file;
 static int opt_programid = 0;
@@ -3450,15 +3449,10 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
     if(video_codec_tag)
         video_enc->codec_tag= video_codec_tag;
 
-    if(   (video_global_header&1)
-       || (video_global_header==0 && (oc->oformat->flags & AVFMT_GLOBALHEADER))){
+    if(oc->oformat->flags & AVFMT_GLOBALHEADER) {
         video_enc->flags |= CODEC_FLAG_GLOBAL_HEADER;
         avcodec_opts[AVMEDIA_TYPE_VIDEO]->flags|= CODEC_FLAG_GLOBAL_HEADER;
     }
-    if(video_global_header&2){
-        video_enc->flags2 |= CODEC_FLAG2_LOCAL_HEADER;
-        avcodec_opts[AVMEDIA_TYPE_VIDEO]->flags2|= CODEC_FLAG2_LOCAL_HEADER;
-    }
 
     if (video_stream_copy) {
         st->stream_copy = 1;
@@ -4310,7 +4304,6 @@ static const OptionDef options[] = {
     { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" },
     { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" },
     { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" },
-    { "vglobal", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_global_header}, "video global header storage type", "" },
     { "copyts", OPT_BOOL | OPT_EXPERT, {(void*)&copy_ts}, "copy timestamps" },
     { "copytb", OPT_BOOL | OPT_EXPERT, {(void*)&copy_tb}, "copy input stream time base when stream copying" },
     { "shortest", OPT_BOOL | OPT_EXPERT, {(void*)&opt_shortest}, "finish encoding within shortest input" }, //

From a13fec8a9cbc13745bcb7e7c1b017d72623051ad Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sat, 21 May 2011 21:53:15 +0200
Subject: [PATCH 183/830] DirectShow capture: Fix build

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavdevice/dshow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index ef21af796a..901c766696 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c
@@ -464,7 +464,7 @@ dshow_add_device(AVFormatContext *avctx, AVFormatParameters *ap,
             goto error;
         }
 
-        codec->codec_type  = CODEC_TYPE_AUDIO;
+        codec->codec_type  = AVMEDIA_TYPE_AUDIO;
         codec->sample_fmt  = sample_fmt_bits_per_sample(fx->wBitsPerSample);
         codec->codec_id    = waveform_codec_id(codec->sample_fmt);
         codec->sample_rate = fx->nSamplesPerSec;
@@ -634,7 +634,7 @@ static int dshow_read_packet(AVFormatContext *s, AVPacket *pkt)
     return pkt->size;
 }
 
-AVInputFormat dshow_demuxer = {
+AVInputFormat ff_dshow_demuxer = {
     "dshow",
     NULL_IF_CONFIG_SMALL("DirectShow capture"),
     sizeof(struct dshow_ctx),

From eb97d4d611efdb8480a5a8dcfbc35860b9eda638 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 May 2011 02:33:03 +0200
Subject: [PATCH 184/830] wmadec: avoid infinit loop.

Fixes ticket183

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/wmadec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 3da1a60c19..840025d493 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -827,7 +827,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
         return 0;
     }
     if (buf_size < s->block_align)
-        return 0;
+        return AVERROR(EINVAL);
     buf_size = s->block_align;
 
     samples = data;

From 7d5297b3436623fe52f9424d0bc3ae03fbfe164d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:18:59 +0200
Subject: [PATCH 185/830] vf_split: fix various nits

---
 libavfilter/vf_split.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c
index 635c9284f8..2cab8fad1b 100644
--- a/libavfilter/vf_split.c
+++ b/libavfilter/vf_split.c
@@ -1,6 +1,5 @@
 /*
- * Video splitter
- * copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2007 Bobby Bingham
  *
  * This file is part of FFmpeg.
  *
@@ -19,32 +18,36 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Video splitter
+ */
+
 #include "avfilter.h"
 
-static void start_frame(AVFilterLink *link, AVFilterBufferRef *picref)
+static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
 {
-    avfilter_start_frame(link->dst->outputs[0],
+    avfilter_start_frame(inlink->dst->outputs[0],
                          avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
-    avfilter_start_frame(link->dst->outputs[1],
+    avfilter_start_frame(inlink->dst->outputs[1],
                          avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
 }
 
-static void end_frame(AVFilterLink *link)
+static void end_frame(AVFilterLink *inlink)
 {
-    avfilter_end_frame(link->dst->outputs[0]);
-    avfilter_end_frame(link->dst->outputs[1]);
+    avfilter_end_frame(inlink->dst->outputs[0]);
+    avfilter_end_frame(inlink->dst->outputs[1]);
 
-    avfilter_unref_buffer(link->cur_buf);
+    avfilter_unref_buffer(inlink->cur_buf);
 }
 
-static void draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
+static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
 {
-    avfilter_draw_slice(link->dst->outputs[0], y, h, slice_dir);
-    avfilter_draw_slice(link->dst->outputs[1], y, h, slice_dir);
+    avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir);
+    avfilter_draw_slice(inlink->dst->outputs[1], y, h, slice_dir);
 }
 
-AVFilter avfilter_vf_split =
-{
+AVFilter avfilter_vf_split = {
     .name      = "split",
 
     .inputs    = (AVFilterPad[]) {{ .name            = "default",
@@ -60,4 +63,3 @@ AVFilter avfilter_vf_split =
                                     .type            = AVMEDIA_TYPE_VIDEO, },
                                   { .name = NULL}},
 };
-

From 88fc2e411ecf66b8a8e1b41b027b51bf2b5434b0 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:19:20 +0200
Subject: [PATCH 186/830] vf_split: add description

---
 libavfilter/vf_split.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c
index 2cab8fad1b..4ef959a5c5 100644
--- a/libavfilter/vf_split.c
+++ b/libavfilter/vf_split.c
@@ -49,6 +49,7 @@ static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
 
 AVFilter avfilter_vf_split = {
     .name      = "split",
+    .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."),
 
     .inputs    = (AVFilterPad[]) {{ .name            = "default",
                                     .type            = AVMEDIA_TYPE_VIDEO,

From de1100a00a483d967ed82c46792e1553b5bdc332 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:20:53 +0200
Subject: [PATCH 187/830] vf_split: define draw_slice() before end_frame()

Improve logical coherence, fix nit.
---
 libavfilter/vf_split.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c
index 4ef959a5c5..75b9c6a877 100644
--- a/libavfilter/vf_split.c
+++ b/libavfilter/vf_split.c
@@ -33,6 +33,12 @@ static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
                          avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
 }
 
+static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
+{
+    avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir);
+    avfilter_draw_slice(inlink->dst->outputs[1], y, h, slice_dir);
+}
+
 static void end_frame(AVFilterLink *inlink)
 {
     avfilter_end_frame(inlink->dst->outputs[0]);
@@ -41,12 +47,6 @@ static void end_frame(AVFilterLink *inlink)
     avfilter_unref_buffer(inlink->cur_buf);
 }
 
-static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
-{
-    avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir);
-    avfilter_draw_slice(inlink->dst->outputs[1], y, h, slice_dir);
-}
-
 AVFilter avfilter_vf_split = {
     .name      = "split",
     .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."),

From 6f5a145be17c1aeb61a0c7f2ef14a8ec537d7d46 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:32:18 +0200
Subject: [PATCH 188/830] vf_split: give more meaningful names to the output
 pads

Rename "default" -> "output1", "default2" -> output2.
---
 libavfilter/vf_split.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c
index 75b9c6a877..cbebf264fa 100644
--- a/libavfilter/vf_split.c
+++ b/libavfilter/vf_split.c
@@ -58,9 +58,9 @@ AVFilter avfilter_vf_split = {
                                     .draw_slice      = draw_slice,
                                     .end_frame       = end_frame, },
                                   { .name = NULL}},
-    .outputs   = (AVFilterPad[]) {{ .name            = "default",
+    .outputs   = (AVFilterPad[]) {{ .name            = "output1",
                                     .type            = AVMEDIA_TYPE_VIDEO, },
-                                  { .name            = "default2",
+                                  { .name            = "output2",
                                     .type            = AVMEDIA_TYPE_VIDEO, },
                                   { .name = NULL}},
 };

From 8a5b71d0c3dbb0b1a6895ea858353e6fb41e9ab8 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:32:40 +0200
Subject: [PATCH 189/830] vf_split: add documentation to filters.texi

---
 doc/filters.texi | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 31a963d058..363a7f5584 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1383,6 +1383,21 @@ not specified it will use the default value of 16.
 Adding this in the beginning of filter chains should make filtering
 faster due to better use of the memory cache.
 
+@section split
+
+Pass on the input video to two outputs. Both outputs are identical to
+the input video.
+
+For example:
+@example
+[in] split [splitout1][splitout2];
+[splitout1] crop=100:100:0:0    [cropout];
+[splitout2] pad=200:200:100:100 [padout];
+@end example
+
+will create two separate outputs from the same input, one cropped and
+one padded.
+
 @section transpose
 
 Transpose rows with columns in the input video and optionally flip it.

From af2ed4b7488468abc8ccdd1b45d810886a609e1e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 01:34:15 +0200
Subject: [PATCH 190/830] lavfi: bump minor and add changelog entry after the
 split filter addition

---
 Changelog              | 1 +
 libavfilter/avfilter.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Changelog b/Changelog
index 702f53f795..734396dfde 100644
--- a/Changelog
+++ b/Changelog
@@ -17,6 +17,7 @@ version <next>:
 - Apple Core Audio Format muxer
 - 9bit and 10bit H.264 decoding
 - 9bit and 10bit FFV1 encoding / decoding
+- split filter added
 
 
 version 0.7_beta1:
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 02f2ed2f77..4502f3e54a 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR  8
+#define LIBAVFILTER_VERSION_MINOR  9
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

From 33adcdb53118df8f281742b75adf861cda64801a Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 May 2011 03:30:01 +0200
Subject: [PATCH 191/830] mpeg2dec: Fix lowres 3

Fixes ticket212

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mpegvideo.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 0a0a11ebc9..d210865d29 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1722,8 +1722,10 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
     if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
         uvsx= (uvsx << 2) >> lowres;
         uvsy= (uvsy << 2) >> lowres;
-        pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
-        pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
+        if(h >> s->chroma_y_shift){
+            pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
+            pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
+        }
     }
     //FIXME h261 lowres loop filter
 }

From 165c7c420d611bfa16d999f2033619c542961926 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Sun, 22 May 2011 12:04:33 +0200
Subject: [PATCH 192/830] Fix dct32() compilation with --disable-yasm

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/fft.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index 8eef4214a2..899f0f7ad5 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -56,11 +56,13 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
 #if CONFIG_DCT
 av_cold void ff_dct_init_mmx(DCTContext *s)
 {
+#if HAVE_YASM
     int has_vectors = av_get_cpu_flags();
     if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
         s->dct32 = ff_dct32_float_avx;
     else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
         s->dct32 = ff_dct32_float_sse;
+#endif
 }
 #endif
 

From e6c1791b471e4dd03d8fd33d02307d9d9eae948d Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Sun, 22 May 2011 13:41:13 +0200
Subject: [PATCH 193/830] Fix compilation with --disable-yasm.

---
 libavcodec/x86/fft.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index fb8329c011..18964d88ca 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -56,11 +56,13 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
 #if CONFIG_DCT
 av_cold void ff_dct_init_mmx(DCTContext *s)
 {
+#if HAVE_YASM
     int has_vectors = av_get_cpu_flags();
     if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
         s->dct32 = ff_dct32_float_avx;
     else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
         s->dct32 = ff_dct32_float_sse;
+#endif
 }
 #endif
 

From 422b2362fc83ed3a75532ea68a6d167c52f447ec Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Sat, 21 May 2011 23:36:23 +0200
Subject: [PATCH 194/830] dct32_sse: eliminate some spills

125->104 cycles on penryn (x86_64 only)
---
 libavcodec/x86/dct32_sse.asm  | 203 +++++++++++++++++++++++++---------
 libavcodec/x86/fmtconvert.asm |  13 +--
 libavcodec/x86/x86util.asm    |  20 ++++
 3 files changed, 176 insertions(+), 60 deletions(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index 2e1176cd84..bafe00289d 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -20,7 +20,7 @@
 ;******************************************************************************
 
 %include "x86inc.asm"
-%include "config.asm"
+%include "x86util.asm"
 
 SECTION_RODATA 32
 
@@ -37,8 +37,9 @@ ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
             dd   1.000000,  1.000000,  1.306563,  0.541196
             dd   1.000000,  0.707107,  1.000000, -0.707107
             dd   1.000000,  0.707107,  1.000000, -0.707107
+            dd   0.707107,  0.707107,  0.707107,  0.707107
 
-
+align 32
 ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 
 %macro BUTTERFLY_SSE 4
@@ -77,6 +78,18 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
     BUTTERFLY0 %1, %2, %3, %4, 0xb1
 %endmacro
 
+%macro BUTTERFLY3V 5
+    movaps m%5, m%1
+    addps  m%1, m%2
+    subps  m%5, m%2
+    SWAP %2, %5
+    mulps  m%2, [ps_cos_vec+192]
+    movaps m%5, m%3
+    addps  m%3, m%4
+    subps  m%4, m%5
+    mulps  m%4, [ps_cos_vec+192]
+%endmacro
+
 %macro PASS6_AND_PERMUTE 0
     mov         tmpd, [outq+4]
     movss         m7, [outq+72]
@@ -269,9 +282,131 @@ INIT_XMM
 %define BUTTERFLY  BUTTERFLY_SSE
 %define BUTTERFLY0 BUTTERFLY0_SSE
 
+%ifdef ARCH_X86_64
+%define SPILL SWAP
+%define UNSPILL SWAP
+
+%macro PASS5 0
+    nop ; FIXME code alignment
+    SWAP 5, 8
+    SWAP 4, 12
+    SWAP 6, 14
+    SWAP 7, 13
+    SWAP 0, 15
+    PERMUTE 9,10, 10,12, 11,14, 12,9, 13,11, 14,13
+    TRANSPOSE4x4PS 8, 9, 10, 11, 0
+    BUTTERFLY3V    8, 9, 10, 11, 0
+    addps   m10, m11
+    TRANSPOSE4x4PS 12, 13, 14, 15, 0
+    BUTTERFLY3V    12, 13, 14, 15, 0
+    addps   m14, m15
+    addps   m12, m14
+    addps   m14, m13
+    addps   m13, m15
+%endmacro
+
+%macro PASS6 0
+    SWAP 9, 12
+    SWAP 11, 14
+    movss [outq+0x00], m8
+    pshuflw m0, m8, 0xe
+    movss [outq+0x10], m9
+    pshuflw m1, m9, 0xe
+    movss [outq+0x20], m10
+    pshuflw m2, m10, 0xe
+    movss [outq+0x30], m11
+    pshuflw m3, m11, 0xe
+    movss [outq+0x40], m12
+    pshuflw m4, m12, 0xe
+    movss [outq+0x50], m13
+    pshuflw m5, m13, 0xe
+    movss [outq+0x60], m14
+    pshuflw m6, m14, 0xe
+    movaps [outq+0x70], m15
+    pshuflw m7, m15, 0xe
+    addss   m0, m1
+    addss   m1, m2
+    movss [outq+0x08], m0
+    addss   m2, m3
+    movss [outq+0x18], m1
+    addss   m3, m4
+    movss [outq+0x28], m2
+    addss   m4, m5
+    movss [outq+0x38], m3
+    addss   m5, m6
+    movss [outq+0x48], m4
+    addss   m6, m7
+    movss [outq+0x58], m5
+    movss [outq+0x68], m6
+    movss [outq+0x78], m7
+
+    PERMUTE 1,8, 3,9, 5,10, 7,11, 9,12, 11,13, 13,14, 8,1, 10,3, 12,5, 14,7
+    movhlps m0, m1
+    pshufd  m1, m1, 3
+    SWAP 0, 2, 4, 6, 8, 10, 12, 14
+    SWAP 1, 3, 5, 7, 9, 11, 13, 15
+%rep 7
+    movhlps m0, m1
+    pshufd  m1, m1, 3
+    addss   m15, m1
+    SWAP 0, 2, 4, 6, 8, 10, 12, 14
+    SWAP 1, 3, 5, 7, 9, 11, 13, 15
+%endrep
+%assign i 4
+%rep 15
+    addss m0, m1
+    movss [outq+i], m0
+    SWAP 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+    %assign i i+8
+%endrep
+%endmacro
+
+%else ; ARCH_X86_32
+%macro SPILL 2 ; xmm#, mempos
+    movaps [outq+(%2-8)*16], m%1
+%endmacro
+%macro UNSPILL 2
+    movaps m%1, [outq+(%2-8)*16]
+%endmacro
+
+%define PASS6 PASS6_AND_PERMUTE
+%macro PASS5 0
+    movaps      m2, [ps_cos_vec+160]
+    shufps      m3, m3, 0xcc
+
+    BUTTERFLY3  m5, m3, m2, m1
+    SPILL 5, 8
+
+    UNSPILL 1, 9
+    BUTTERFLY3  m1, m3, m2, m5
+    SPILL 1, 14
+
+    BUTTERFLY3  m4, m3, m2, m5
+    SPILL 4, 12
+
+    BUTTERFLY3  m7, m3, m2, m5
+    SPILL 7, 13
+
+    UNSPILL 5, 10
+    BUTTERFLY3  m5, m3, m2, m7
+    SPILL 5, 10
+
+    UNSPILL 4, 11
+    BUTTERFLY3  m4, m3, m2, m7
+    SPILL 4, 11
+
+    BUTTERFLY3  m6, m3, m2, m7
+    SPILL 6, 9
+
+    BUTTERFLY3  m0, m3, m2, m7
+    SPILL 0, 15
+%endmacro
+%endif
+
+
 INIT_XMM
 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_sse, 2,3,8, out, in, tmp
+cglobal dct32_float_sse, 2,3,16, out, in, tmp
     ; pass 1
 
     movaps      m0, [inq+0]
@@ -287,8 +422,8 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     ; pass 2
     movaps      m2, [ps_cos_vec+64]
     BUTTERFLY   m1, m4, m2, m3
-    movaps      [outq+48], m1
-    movaps      [outq+ 0], m4
+    SPILL 1, 11
+    SPILL 4, 8
 
     ; pass 1
     movaps      m1, [inq+16]
@@ -313,17 +448,17 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     movaps      m2, [ps_cos_vec+96]
     shufps      m1, m1, 0x1b
     BUTTERFLY   m0, m1, m2, m3
-    movaps      [outq+112], m0
-    movaps      [outq+ 96], m1
+    SPILL 0, 15
+    SPILL 1, 14
 
-    movaps      m0, [outq+0]
+    UNSPILL 0, 8
     shufps      m5, m5, 0x1b
     BUTTERFLY   m0, m5, m2, m3
 
-    movaps      m1, [outq+48]
+    UNSPILL 1, 11
     shufps      m6, m6, 0x1b
     BUTTERFLY   m1, m6, m2, m3
-    movaps      [outq+48], m1
+    SPILL 1, 11
 
     shufps      m4, m4, 0x1b
     BUTTERFLY   m7, m4, m2, m3
@@ -335,57 +470,25 @@ cglobal dct32_float_sse, 2,3,8, out, in, tmp
     BUTTERFLY2  m5, m3, m2, m1
 
     BUTTERFLY2  m0, m3, m2, m1
-    movaps      [outq+16], m0
+    SPILL 0, 9
 
     BUTTERFLY2  m6, m3, m2, m1
-    movaps      [outq+32], m6
+    SPILL 6, 10
 
-    movaps      m0, [outq+48]
+    UNSPILL 0, 11
     BUTTERFLY2  m0, m3, m2, m1
-    movaps      [outq+48], m0
+    SPILL 0, 11
 
     BUTTERFLY2  m4, m3, m2, m1
 
     BUTTERFLY2  m7, m3, m2, m1
 
-    movaps      m6, [outq+96]
+    UNSPILL 6, 14
     BUTTERFLY2  m6, m3, m2, m1
 
-    movaps      m0, [outq+112]
+    UNSPILL 0, 15
     BUTTERFLY2  m0, m3, m2, m1
 
-    ; pass 5
-    movaps      m2, [ps_cos_vec+160]
-    shufps      m3, m3, 0xcc
-
-    BUTTERFLY3  m5, m3, m2, m1
-    movaps      [outq+0], m5
-
-    movaps      m1, [outq+16]
-    BUTTERFLY3  m1, m3, m2, m5
-    movaps      [outq+96], m1
-
-    BUTTERFLY3  m4, m3, m2, m5
-    movaps      [outq+64], m4
-
-    BUTTERFLY3  m7, m3, m2, m5
-    movaps      [outq+80], m7
-
-    movaps      m5, [outq+32]
-    BUTTERFLY3  m5, m3, m2, m7
-    movaps      [outq+32], m5
-
-    movaps      m4, [outq+48]
-    BUTTERFLY3  m4, m3, m2, m7
-    movaps      [outq+48], m4
-
-    BUTTERFLY3  m6, m3, m2, m7
-    movaps      [outq+16], m6
-
-    BUTTERFLY3  m0, m3, m2, m7
-    movaps      [outq+112], m0
-
-
-    ;    pass 6, no SIMD...
-    PASS6_AND_PERMUTE
+    PASS5
+    PASS6
     RET
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 13d6cc0130..efab87d570 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -95,13 +95,6 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2
 ; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
 ;-----------------------------------------------------------------------------
 
-%macro BUTTERFLYPS 3
-    movaps    m%3, m%1
-    unpcklps  m%1, m%2
-    unpckhps  m%3, m%2
-    SWAP %2, %3
-%endmacro
-
 %macro FLOAT_INTERLEAVE6 2
 cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
 %ifdef ARCH_X86_64
@@ -130,9 +123,9 @@ cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
     movaps    m4, [srcq+src4q]
     movaps    m5, [srcq+src5q]
 
-    BUTTERFLYPS 0, 1, 6
-    BUTTERFLYPS 2, 3, 6
-    BUTTERFLYPS 4, 5, 6
+    SBUTTERFLYPS 0, 1, 6
+    SBUTTERFLYPS 2, 3, 6
+    SBUTTERFLYPS 4, 5, 6
 
     movaps    m6, m4
     shufps    m4, m0, 0xe4
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 7bd985a33b..141e96000c 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -41,6 +41,13 @@
     SWAP %2, %4, %3
 %endmacro
 
+%macro SBUTTERFLYPS 3
+    movaps   m%3, m%1
+    unpcklps m%1, m%2
+    unpckhps m%3, m%2
+    SWAP %2, %3
+%endmacro
+
 %macro TRANSPOSE4x4B 5
     SBUTTERFLY bw, %1, %2, %5
     SBUTTERFLY bw, %3, %4, %5
@@ -74,6 +81,19 @@
     SWAP %2, %3
 %endmacro
 
+; identical behavior to TRANSPOSE4x4D, but using SSE1 float ops
+%macro TRANSPOSE4x4PS 5
+    SBUTTERFLYPS %1, %2, %5
+    SBUTTERFLYPS %3, %4, %5
+    movaps  m%5, m%1
+    movlhps m%1, m%3
+    movhlps m%3, m%5
+    movaps  m%5, m%2
+    movlhps m%2, m%4
+    movhlps m%4, m%5
+    SWAP %2, %3
+%endmacro
+
 %macro TRANSPOSE8x8W 9-11
 %ifdef ARCH_X86_64
     SBUTTERFLY wd,  %1, %2, %9

From 83db71977700d3337c84d5945ac8b7e7ee881ac2 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 19:30:08 +0200
Subject: [PATCH 195/830] lavfi: make vsrc_buffer.h header public

Address trac issue #33.
---
 doc/APIchanges         | 3 +++
 libavfilter/Makefile   | 2 +-
 libavfilter/avfilter.h | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 710967c809..4ccd72b3c1 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-22 - xxxxxx - lavfi 2.10.0 - vsrc_buffer.h
+  Make libavfilter/vsrc_buffer.h public.
+
 2011-05-XX - XXXXXX - lavfi 2.8.0 - avcodec.h
   Add av_vsrc_buffer_add_frame() to libavfilter/avcodec.h.
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 8ea3169841..8130ee4d20 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -6,7 +6,7 @@ FFLIBS-$(CONFIG_MOVIE_FILTER) += avformat avcodec
 FFLIBS-$(CONFIG_SCALE_FILTER) += swscale
 FFLIBS-$(CONFIG_MP_FILTER) += avcodec
 
-HEADERS = avcodec.h avfilter.h avfiltergraph.h
+HEADERS = avcodec.h avfilter.h avfiltergraph.h vsrc_buffer.h
 
 OBJS = allfilters.o                                                     \
        avfilter.o                                                       \
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 1c4b8d6cb8..cee5bbc114 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR  9
+#define LIBAVFILTER_VERSION_MINOR 10
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

From 5ecdfd008bce961c3241eaa1f8dc06e82a6b12db Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 22:09:34 +0200
Subject: [PATCH 196/830] lavf: deprecate avformat_alloc_output_context() in
 favor of avformat_alloc_output_context2()

The new function accepts a slightly more intuitive order of paramters,
and returns an error code, thus allowing applications to report a
meaningful error message.
---
 doc/APIchanges               |  4 ++++
 ffmpeg.c                     |  4 ++--
 libavformat/avformat.h       | 30 ++++++++++++++++++++++++++----
 libavformat/output-example.c |  4 ++--
 libavformat/utils.c          | 25 ++++++++++++++++++++++---
 libavformat/version.h        |  5 ++++-
 6 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 4ccd72b3c1..c7389241d5 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-22 - xxxxxx - lavf 53.2.0 - avformat.h
+  Introduce avformat_alloc_output_context2() and deprecate
+  avformat_alloc_output_context().
+
 2011-05-22 - xxxxxx - lavfi 2.10.0 - vsrc_buffer.h
   Make libavfilter/vsrc_buffer.h public.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index b9d182cdf8..af57fc9456 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3881,10 +3881,10 @@ static void opt_output_file(const char *filename)
     if (!strcmp(filename, "-"))
         filename = "pipe:";
 
-    oc = avformat_alloc_output_context(last_asked_format, NULL, filename);
+    err = avformat_alloc_output_context2(&oc, NULL, last_asked_format, filename);
     last_asked_format = NULL;
     if (!oc) {
-        print_error(filename, AVERROR(ENOMEM));
+        print_error(filename, err);
         ffmpeg_exit(1);
     }
     file_oformat= oc->oformat;
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 991b0a4fd5..fc74444e92 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1054,12 +1054,34 @@ int av_demuxer_open(AVFormatContext *ic, AVFormatParameters *ap);
  */
 AVFormatContext *avformat_alloc_context(void);
 
+#if FF_API_ALLOC_OUTPUT_CONTEXT
 /**
- * Allocate an AVFormatContext.
- * avformat_free_context() can be used to free the context and everything
- * allocated by the framework within it.
+ * @deprecated deprecated in favor of avformat_alloc_output_context2()
  */
-AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputFormat *oformat, const char *filename);
+attribute_deprecated
+AVFormatContext *avformat_alloc_output_context(const char *format,
+                                               AVOutputFormat *oformat,
+                                               const char *filename);
+#endif
+
+/**
+ * Allocate an AVFormatContext for an output format.
+ * avformat_free_context() can be used to free the context and
+ * everything allocated by the framework within it.
+ *
+ * @param *ctx is set to the created format context, or to NULL in
+ * case of failure
+ * @param oformat format to use for allocating the context, if NULL
+ * format_name and filename are used instead
+ * @param format_name the name of output format to use for allocating the
+ * context, if NULL filename is used instead
+ * @param filename the name of the filename to use for allocating the
+ * context, may be NULL
+ * @return >= 0 in case of success, a negative AVERROR code in case of
+ * failure
+ */
+int avformat_alloc_output_context2(AVFormatContext **ctx, AVOutputFormat *oformat,
+                                   const char *format_name, const char *filename);
 
 /**
  * Read packets of a media file to get stream information. This
diff --git a/libavformat/output-example.c b/libavformat/output-example.c
index ac35ff023a..f174305fe6 100644
--- a/libavformat/output-example.c
+++ b/libavformat/output-example.c
@@ -443,10 +443,10 @@ int main(int argc, char **argv)
     filename = argv[1];
 
     /* allocate the output media context */
-    oc = avformat_alloc_output_context(NULL, NULL, filename);
+    avformat_alloc_output_context2(&oc, NULL, NULL, filename);
     if (!oc) {
         printf("Could not deduce output format from file extension: using MPEG.\n");
-        oc = avformat_alloc_output_context("mpeg", NULL, filename);
+        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
     }
     if (!oc) {
         exit(1);
diff --git a/libavformat/utils.c b/libavformat/utils.c
index c5e570028e..71c325a542 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2751,8 +2751,13 @@ int av_set_parameters(AVFormatContext *s, AVFormatParameters *ap)
     return 0;
 }
 
-AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputFormat *oformat, const char *filename){
+int avformat_alloc_output_context2(AVFormatContext **avctx, AVOutputFormat *oformat,
+                                   const char *format, const char *filename)
+{
     AVFormatContext *s= avformat_alloc_context();
+    int ret = 0;
+
+    *avctx = NULL;
     if(!s)
         goto nomem;
 
@@ -2761,11 +2766,13 @@ AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputForma
             oformat = av_guess_format(format, NULL, NULL);
             if (!oformat) {
                 av_log(s, AV_LOG_ERROR, "Requested output format '%s' is not a suitable output format\n", format);
+                ret = AVERROR(EINVAL);
                 goto error;
             }
         } else {
             oformat = av_guess_format(NULL, filename, NULL);
             if (!oformat) {
+                ret = AVERROR(EINVAL);
                 av_log(s, AV_LOG_ERROR, "Unable to find a suitable output format for '%s'\n",
                         filename);
                 goto error;
@@ -2787,14 +2794,26 @@ AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputForma
 
     if(filename)
         av_strlcpy(s->filename, filename, sizeof(s->filename));
-    return s;
+    *avctx = s;
+    return 0;
 nomem:
     av_log(s, AV_LOG_ERROR, "Out of memory\n");
+    ret = AVERROR(ENOMEM);
 error:
     avformat_free_context(s);
-    return NULL;
+    return ret;
 }
 
+#if FF_API_ALLOC_OUTPUT_CONTEXT
+AVFormatContext *avformat_alloc_output_context(const char *format,
+                                               AVOutputFormat *oformat, const char *filename)
+{
+    AVFormatContext *avctx;
+    int ret = avformat_alloc_output_context2(&avctx, oformat, format, filename);
+    return ret < 0 ? NULL : avctx;
+}
+#endif
+
 static int validate_codec_tag(AVFormatContext *s, AVStream *st)
 {
     const AVCodecTag *avctag;
diff --git a/libavformat/version.h b/libavformat/version.h
index fb4577af5f..76b86ed323 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -24,7 +24,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
-#define LIBAVFORMAT_VERSION_MINOR  1
+#define LIBAVFORMAT_VERSION_MINOR  2
 #define LIBAVFORMAT_VERSION_MICRO  0
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
@@ -68,5 +68,8 @@
 #ifndef FF_API_SDP_CREATE
 #define FF_API_SDP_CREATE              (LIBAVFORMAT_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_ALLOC_OUTPUT_CONTEXT
+#define FF_API_ALLOC_OUTPUT_CONTEXT    (LIBAVFORMAT_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVFORMAT_VERSION_H */

From 5045786b7ee235787766337a2f0b05955ff6c649 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 22:12:37 +0200
Subject: [PATCH 197/830] lavf: fix style for avformat_alloc_output_context2()

More consistent/readable.
---
 libavformat/utils.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 71c325a542..52d8a2dc16 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2754,14 +2754,14 @@ int av_set_parameters(AVFormatContext *s, AVFormatParameters *ap)
 int avformat_alloc_output_context2(AVFormatContext **avctx, AVOutputFormat *oformat,
                                    const char *format, const char *filename)
 {
-    AVFormatContext *s= avformat_alloc_context();
+    AVFormatContext *s = avformat_alloc_context();
     int ret = 0;
 
     *avctx = NULL;
-    if(!s)
+    if (!s)
         goto nomem;
 
-    if(!oformat){
+    if (!oformat) {
         if (format) {
             oformat = av_guess_format(format, NULL, NULL);
             if (!oformat) {
@@ -2774,13 +2774,13 @@ int avformat_alloc_output_context2(AVFormatContext **avctx, AVOutputFormat *ofor
             if (!oformat) {
                 ret = AVERROR(EINVAL);
                 av_log(s, AV_LOG_ERROR, "Unable to find a suitable output format for '%s'\n",
-                        filename);
+                       filename);
                 goto error;
             }
         }
     }
 
-    s->oformat= oformat;
+    s->oformat = oformat;
     if (s->oformat->priv_data_size > 0) {
         s->priv_data = av_mallocz(s->oformat->priv_data_size);
         if (!s->priv_data)
@@ -2792,7 +2792,7 @@ int avformat_alloc_output_context2(AVFormatContext **avctx, AVOutputFormat *ofor
     } else
         s->priv_data = NULL;
 
-    if(filename)
+    if (filename)
         av_strlcpy(s->filename, filename, sizeof(s->filename));
     *avctx = s;
     return 0;

From 8089b7fa8c5b5a48cc7101daa4be891d0ead5a5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 22 May 2011 21:34:49 +0300
Subject: [PATCH 198/830] avoptions: Check the return value from av_get_number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This avoids doing a division by zero if the option wasn't found,
or wasn't an option of an appropriate type.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavutil/opt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index 57e3248a74..9e06b01c52 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -290,7 +290,8 @@ double av_get_double(void *obj, const char *name, const AVOption **o_out)
     double num=1;
     int den=1;
 
-    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0)
+        return -1;
     return num*intnum/den;
 }
 
@@ -300,7 +301,8 @@ AVRational av_get_q(void *obj, const char *name, const AVOption **o_out)
     double num=1;
     int den=1;
 
-    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0)
+        return (AVRational){-1, 0};
     if (num == 1.0 && (int)intnum == intnum)
         return (AVRational){intnum, den};
     else
@@ -313,7 +315,8 @@ int64_t av_get_int(void *obj, const char *name, const AVOption **o_out)
     double num=1;
     int den=1;
 
-    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0)
+        return -1;
     return num*intnum/den;
 }
 

From a121754852a69b4879a39ba78863404c13c54f61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 22 May 2011 14:40:33 +0300
Subject: [PATCH 199/830] ffmpeg: Don't trigger url_interrupt_cb on the first
 signal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the url_interrupt_cb callback will abort all IO
after the first received signal. This makes the output files
from e.g. the mov muxer to be unreadable if the transcode is
aborted with ctrl+c.

After this patch, the first signal cleanly breaks out of
the transcoding loop, but won't forcibly abort all IO.
After the second signal is received, the url_interrupt_cb
callback will abort all IO.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 ffmpeg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 0c9545172f..86732535c8 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -426,11 +426,13 @@ static void term_exit(void)
 }
 
 static volatile int received_sigterm = 0;
+static volatile int received_nb_signals = 0;
 
 static void
 sigterm_handler(int sig)
 {
     received_sigterm = sig;
+    received_nb_signals++;
     term_exit();
 }
 
@@ -445,7 +447,7 @@ static void term_init(void)
 
 static int decode_interrupt_cb(void)
 {
-    return received_sigterm;
+    return received_nb_signals > 1;
 }
 
 static int ffmpeg_exit(int ret)

From 94ea17075ba0751a4ac0f9fa0929956a18497b5a Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 May 2011 16:18:36 +0200
Subject: [PATCH 200/830] dct32: Replacing libav by ffmpeg in the license
 header with the authors permission. Signed-off-by: Michael Niedermayer
 <michaelni@gmx.at>

---
 libavcodec/x86/dct32_sse.asm | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index 2e1176cd84..d541164496 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -2,20 +2,20 @@
 ;* 32 point SSE-optimized DCT transform
 ;* Copyright (c) 2010 Vitor Sessak
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 

From a0cd98b5943a7807aff837f9adba1dd034ec2272 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 May 2011 19:06:10 +0200
Subject: [PATCH 201/830] Fix ticket127

This fixes detectiion of slightly too big dummy frames.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h263dec.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index b56fd06516..bf6a856796 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -681,22 +681,18 @@ retry:
 frame_end:
     /* divx 5.01+ bistream reorder stuff */
     if(s->codec_id==CODEC_ID_MPEG4 && s->divx_packed){
-        int current_pos= get_bits_count(&s->gb)>>3;
+        int current_pos= s->gb.buffer == s->bitstream_buffer ? 0 : (get_bits_count(&s->gb)>>3);
         int startcode_found=0;
 
         if(buf_size - current_pos > 5){
             int i;
-            for(i=current_pos; i<buf_size-3; i++){
+            for(i=current_pos; i<buf_size-4; i++){
                 if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){
-                    startcode_found=1;
+                    startcode_found=!(buf[i+4]&0x40);
                     break;
                 }
             }
         }
-        if(s->gb.buffer == s->bitstream_buffer && buf_size>7 && s->xvid_build>=0){ //xvid style
-            startcode_found=1;
-            current_pos=0;
-        }
 
         if(startcode_found){
             av_fast_malloc(

From f455f46455ae258d2d0ebc50a2e991fa020ba92d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 May 2011 21:26:13 +0200
Subject: [PATCH 202/830] movdec: dont divide by zero when
 stts_data[0].duration = 0.

Fixes ticket223
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/mov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 26e7a3a2df..726ec1e4b1 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1527,7 +1527,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
         int rescaled = sc->time_offset < 0 ? av_rescale(sc->time_offset, sc->time_scale, mov->time_scale) : sc->time_offset;
         current_dts = -rescaled;
         if (sc->ctts_data && sc->stts_data &&
-            sc->ctts_data[0].duration / sc->stts_data[0].duration > 16) {
+            sc->ctts_data[0].duration / FFMAX(sc->stts_data[0].duration, 1) > 16) {
             /* more than 16 frames delay, dts are likely wrong
                this happens with files created by iMovie */
             sc->wrong_dts = 1;

From 458f20bc75f9bf7615c65d30296071a69f1660e9 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 22 May 2011 21:28:28 +0200
Subject: [PATCH 203/830] libx264: specify field for default union values in
 options

Fix warnings.
---
 libavcodec/libx264.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 519dc511ec..d9bac17484 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -421,14 +421,14 @@ static av_cold int X264_init(AVCodecContext *avctx)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
-    {"preset", "Set the encoding preset", OFFSET(preset), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"tune", "Tune the encoding params", OFFSET(tune), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"fastfirstpass", "Use fast settings when encoding first pass", OFFSET(fastfirstpass), FF_OPT_TYPE_INT, 1, 0, 1, VE},
-    {"profile", "Set profile restrictions", OFFSET(profile), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
-    {"x264opts", "x264 options", OFFSET(x264opts), FF_OPT_TYPE_STRING, 0, 0, 0, VE},
+    {"preset", "Set the encoding preset", OFFSET(preset), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"tune", "Tune the encoding params", OFFSET(tune), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"fastfirstpass", "Use fast settings when encoding first pass", OFFSET(fastfirstpass), FF_OPT_TYPE_INT, {.dbl=1}, 0, 1, VE},
+    {"profile", "Set profile restrictions", OFFSET(profile), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"x264opts", "x264 options", OFFSET(x264opts), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
     { NULL },
 };
 

From 0753721ed1caab048b58db15ebfdf45569e83420 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Mon, 23 May 2011 01:23:33 +0200
Subject: [PATCH 204/830] Do not ask for samples if a specific channel layout
 was requested.

---
 libavcodec/mlpdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 50826d0ff5..b13d0795cd 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -340,6 +340,7 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
             m->avctx->channel_layout = ff_truehd_layout(mh.channels_thd_stream1);
         }
         if (m->avctx->channels &&
+            !m->avctx->request_channels && !m->avctx->request_channel_layout &&
             av_get_channel_layout_nb_channels(m->avctx->channel_layout) != m->avctx->channels) {
             m->avctx->channel_layout = 0;
             av_log_ask_for_sample(m->avctx, "Unknown channel layout.");

From 6465c820da7b104150366a8cdd837c00cf364235 Mon Sep 17 00:00:00 2001
From: Kieran Kunhya <kieran@kunhya.com>
Date: Mon, 23 May 2011 03:02:12 +0100
Subject: [PATCH 205/830] Fix 9/10 bit in swscale.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libswscale/swscale.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index f5c4e88688..3943aa0b72 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -233,7 +233,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
         } \
     }
     for (i = 0; i < dstW; i++) {
-        int val = 1 << 10;
+        int val = 1 << (26-output_bits);
         int j;
 
         for (j = 0; j < lumFilterSize; j++)
@@ -244,8 +244,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
 
     if (uDest) {
         for (i = 0; i < chrDstW; i++) {
-            int u = 1 << 10;
-            int v = 1 << 10;
+            int u = 1 << (26-output_bits);
+            int v = 1 << (26-output_bits);
             int j;
 
             for (j = 0; j < chrFilterSize; j++) {
@@ -260,7 +260,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
 
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         for (i = 0; i < dstW; i++) {
-            int val = 1 << 10;
+            int val = 1 << (26-output_bits);
             int j;
 
             for (j = 0; j < lumFilterSize; j++)

From a10fb79070c017be613700b946f51baed4f69df0 Mon Sep 17 00:00:00 2001
From: Dave Yeo <dave.r.yeo@gmail.com>
Date: Sun, 22 May 2011 10:44:17 -0700
Subject: [PATCH 206/830] x86 asm: Add SECTION_TEXT to dct32_sse.asm.

This fixes the following error on OS/2:
error: segment name `.text align=16' not recognized

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/dct32_sse.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index bafe00289d..46daa43d8c 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -203,7 +203,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 %define BUTTERFLY0 BUTTERFLY0_AVX
 
 INIT_YMM
-section .text align=16
+SECTION_TEXT
 %ifdef HAVE_AVX
 ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
 cglobal dct32_float_avx, 2,3,8, out, in, tmp

From b47904d158709bdec1a9d40e83d1abadf50081dc Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Fri, 20 May 2011 00:39:12 +0300
Subject: [PATCH 207/830] h264: Properly set coded_{width, height} when parsing
 H.264.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/h264.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 5fb303c82f..eb873a4855 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1914,6 +1914,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         s->avctx->sample_aspect_ratio= h->sps.sar;
         av_assert0(s->avctx->sample_aspect_ratio.den);
 
+        h->s.avctx->coded_width = 16*s->mb_width;
+        h->s.avctx->coded_height = 16*s->mb_height;
+
         if(h->sps.video_signal_type_present_flag){
             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
             if(h->sps.colour_description_present_flag){

From 845807494b013a5429918f5d3252c343abcee315 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 23 May 2011 08:08:03 -0400
Subject: [PATCH 208/830] fate: update 9/10bit refs.

---
 tests/ref/lavfi/pixfmts_scale_le | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index cbccb41cbd..37dce4f86c 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -27,12 +27,12 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         5051128ca208d89595f7672b1707340b
-yuv420p10le         77895bf65e70ad2ca021702fff55c8fc
+yuv420p10be         6d335e75b553da590135cf8bb999610c
+yuv420p10le         d510ddbabefd03ef39ec943fcb51b709
 yuv420p16be         29a0265764530070f5cd3251cc01f66a
 yuv420p16le         6f3a265b084a78baec229238d9f7945f
-yuv420p9be          02de6b37dc8a631ce2367b535670c40c
-yuv420p9le          cddfbaf8e2a61aa5ea09fb396bcbc872
+yuv420p9be          ec4983b7a949c0472110a7a2c58e278a
+yuv420p9le          c136dce5913a722eee44ab72cff664b2
 yuv422p             918e37701ee7377d16a8a6c119c56a40
 yuv422p16be         ef3e865fc1d0c68977c735323c50af6e
 yuv422p16le         428a9b96214c09cb5a983ce36d6961ff

From b51021da7b69a98f135cf15bbe7f3ecb08daaa82 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 23 May 2011 08:44:31 -0400
Subject: [PATCH 209/830] fate: fix
 fate-h264-conformance-frext-pph10i4-panasonic-a crcs.

The sample on rsync was corrupt, this one is now bitexact w.r.t. JM.
---
 .../h264-conformance-frext-pph10i4_panasonic_a  | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a
index d351a7eb1f..a06c4577ff 100644
--- a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a
+++ b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a
@@ -4,16 +4,7 @@
 0, 10800, 6220800, 0x7459a1cc
 0, 14400, 6220800, 0x02191aa9
 0, 18000, 6220800, 0x88dca590
-0, 21600, 6220800, 0x56dd150a
-0, 25200, 6220800, 0x5f56a56f
-0, 28800, 6220800, 0x67ada4b7
-0, 32400, 6220800, 0x88dca590
-0, 36000, 6220800, 0xd3b09fe5
-0, 39600, 6220800, 0x2223998c
-0, 43200, 6220800, 0x5e5b2da5
-0, 46800, 6220800, 0x88dca590
-0, 50400, 6220800, 0x5e5b2da5
-0, 54000, 6220800, 0x88dca590
-0, 57600, 6220800, 0x5e5b2da5
-0, 61200, 6220800, 0x88dca590
-0, 64800, 6220800, 0x26e1ec8b
+0, 21600, 6220800, 0x4484d484
+0, 25200, 6220800, 0x8afdb53f
+0, 28800, 6220800, 0xd3d6017a
+0, 32400, 6220800, 0xf5162af0

From 02260ccc3b7f8b88c11af4739252f5c5f97fa6e7 Mon Sep 17 00:00:00 2001
From: jan gerber <j@v2v.cc>
Date: Mon, 23 May 2011 17:22:02 +0200
Subject: [PATCH 210/830] add 5.1 to stereo downmix to resample.c this is based
 on previous 6to2channel-resample.patch from ffmpeg2theora but updated to work
 with trunk and using av_clip_int16.

---
 libavcodec/resample.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/libavcodec/resample.c b/libavcodec/resample.c
index 9e6defefdf..8c4eebe3c4 100644
--- a/libavcodec/resample.c
+++ b/libavcodec/resample.c
@@ -108,6 +108,39 @@ static void mono_to_stereo(short *output, short *input, int n1)
     }
 }
 
+/*
+5.1 to stereo input: [fl, fr, c, lfe, rl, rr]
+- Left = front_left + rear_gain * rear_left + center_gain * center
+- Right = front_right + rear_gain * rear_right + center_gain * center
+Where rear_gain is usually around 0.5-1.0 and
+      center_gain is almost always 0.7 (-3 dB)
+*/
+static void surround_to_stereo(short **output, short *input, int channels, int samples)
+{
+    int i;
+    short l, r;
+
+    for (i = 0; i < samples; i++) {
+        int fl,fr,c,rl,rr,lfe;
+        fl = input[0];
+        fr = input[1];
+        c = input[2];
+        lfe = input[3];
+        rl = input[4];
+        rr = input[5];
+
+        l = av_clip_int16(fl + (0.5 * rl) + (0.7 * c));
+        r = av_clip_int16(fr + (0.5 * rr) + (0.7 * c));
+
+        /* output l & r. */
+        *output[0]++ = l;
+        *output[1]++ = r;
+
+        /* increment input. */
+        input += channels;
+    }
+}
+
 static void deinterleave(short **output, short *input, int channels, int samples)
 {
     int i, j;
@@ -301,6 +334,10 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
     } else if (s->output_channels >= 2 && s->input_channels == 1) {
         buftmp3[0] = bufout[0];
         memcpy(buftmp2[0], input, nb_samples * sizeof(short));
+    } else if (s->input_channels == 6 && s->output_channels ==2) {
+        buftmp3[0] = bufout[0];
+        buftmp3[1] = bufout[1];
+        surround_to_stereo(buftmp2, input, s->input_channels, nb_samples);
     } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) {
         for (i = 0; i < s->input_channels; i++) {
             buftmp3[i] = bufout[i];
@@ -330,7 +367,8 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
         mono_to_stereo(output, buftmp3[0], nb_samples1);
     } else if (s->output_channels == 6 && s->input_channels == 2) {
         ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
-    } else if (s->output_channels == s->input_channels && s->input_channels >= 2) {
+    } else if ((s->output_channels == s->input_channels && s->input_channels >= 2) ||
+               (s->output_channels == 2 && s->input_channels == 6)) {
         interleave(output, buftmp3, s->output_channels, nb_samples1);
     }
 

From bed12e24fff91db18b44811b210ed7ad4572e20c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 23 May 2011 17:33:03 +0200
Subject: [PATCH 211/830] mpegaudio: Correct license header

To the best of my knowledge the author has not agreed to the change
from ffmpeg->libav thus i revert it.
---
 libavcodec/mpegaudiodsp_template.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c
index 5561c46135..02a34079c8 100644
--- a/libavcodec/mpegaudiodsp_template.c
+++ b/libavcodec/mpegaudiodsp_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From cef7d70181ec7ee9df426ef0f3a08dd4995a4d9a Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Mon, 23 May 2011 15:56:52 -0400
Subject: [PATCH 212/830] aacdec: fix typo in scalefactor clipping check

---
 libavcodec/aacdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index fbb3582661..69aacb86d6 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -820,7 +820,7 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
                     else
                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
                     clipped_offset = av_clip(offset[1], -100, 155);
-                    if (offset[2] != clipped_offset) {
+                    if (offset[1] != clipped_offset) {
                         av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
                                 "(%d -> %d).\nIf you heard an audible "
                                 "artifact, there may be a bug in the decoder. ",

From 5f3c436bdf36395974af93c815b86f439e25f36c Mon Sep 17 00:00:00 2001
From: alahuja <alahuja@alahuja-vbox-ubuntu.(none)>
Date: Mon, 23 May 2011 08:33:35 -0700
Subject: [PATCH 213/830] muxers.texi changes for mkv/webm options

---
 doc/muxers.texi | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 9d46803988..03be693918 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -210,4 +210,67 @@ Alternatively you can write the command as:
 ffmpeg -benchmark -i INPUT -f null -
 @end example
 
+@section matroska
+
+Matroska container muxer.
+
+This muxer implements the matroska and webm container specs.
+
+The recognized metadata settings in this muxer are:
+
+@table @option
+
+@item title=@var{title name}
+Name provided to a single track
+@end table
+
+@table @option
+
+@item language=@var{language name}
+Specifies the language of the track in the Matroska languages form
+@end table
+
+@table @option
+
+@item STEREO_MODE=@var{mode}
+Stereo 3D video layout of two views in a single video track
+@table @option
+@item mono
+video is not stereo
+@item left_right
+Both views are arranged side by side, Left-eye view is on the left
+@item bottom_top
+Both views are arranged in top-bottom orientation, Left-eye view is at bottom
+@item top_bottom
+Both views are arranged in top-bottom orientation, Left-eye view is on top
+@item checkerboard_rl
+Each view is arranged in a checkerboard interleaved pattern, Left-eye view being first
+@item checkerboard_lr
+Each view is arranged in a checkerboard interleaved pattern, Right-eye view being first
+@item row_interleaved_rl
+Each view is constituted by a row based interleaving, Right-eye view is first row
+@item row_interleaved_lr
+Each view is constituted by a row based interleaving, Left-eye view is first row
+@item col_interleaved_rl
+Both views are arranged in a column based interleaving manner, Right-eye view is first column
+@item col_interleaved_lr
+Both views are arranged in a column based interleaving manner, Left-eye view is first column
+@item anaglyph_cyan_red
+All frames are in anaglyph format viewable through red-cyan filters
+@item right_left
+Both views are arranged side by side, Right-eye view is on the left
+@item anaglyph_green_magenta
+All frames are in anaglyph format viewable through green-magenta filters
+@item block_lr
+Both eyes laced in one Block, Left-eye view is first
+@item block_rl
+Both eyes laced in one Block, Right-eye view is first
+@end table
+@end table
+
+For example a 3D WebM clip can be created using the following command line:
+@example
+ffmpeg -i sample_left_right_clip.mpg -an -vcodec libvpx -metadata STEREO_MODE=left_right -y stereo_clip.webm
+@end example
+
 @c man end MUXERS

From a7a187a1beb8551101b592bf85f0f31a0db22f61 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 May 2011 14:32:15 +0200
Subject: [PATCH 214/830] configure: Add -U__STRICT_ANSI__ to CPPFLAGS on
 Cygwin and DOS.

In -std=c99 mode GCC defines __STRICT_ANSI__ to hide non-ANSI interfaces.
This causes declarations for some POSIX functions to be omitted from system
headers, which causes compilation failures.
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index b63330cbff..5e3d6fbe47 100755
--- a/configure
+++ b/configure
@@ -2458,11 +2458,13 @@ case $target_os in
         objformat="win32"
         enable dos_paths
         check_cflags -fno-common
+        add_cppflags -U__STRICT_ANSI__
         ;;
     *-dos|freedos|opendos)
         network_extralibs="-lsocket"
         objformat="coff"
         enable dos_paths
+        add_cppflags -U__STRICT_ANSI__
         ;;
     linux)
         add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600

From 6b4c0be5586acad3bbafd7d2dd02a8328a5ab632 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 18 May 2011 23:59:38 +0200
Subject: [PATCH 215/830] mem: define the MAX_MALLOC_SIZE constant and use it
 in place of INT_MAX

This makes re-dimensionating the constant simpler, since now it is
defined only in one place.
---
 libavutil/mem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavutil/mem.c b/libavutil/mem.c
index 57dc658b97..29ecbfa055 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -65,6 +65,8 @@ void  free(void *ptr);
    memory allocator. You do not need to suppress this file because the
    linker will do it automatically. */
 
+#define MAX_MALLOC_SIZE INT_MAX
+
 void *av_malloc(size_t size)
 {
     void *ptr = NULL;
@@ -73,7 +75,7 @@ void *av_malloc(size_t size)
 #endif
 
     /* let's disallow possible ambiguous cases */
-    if(size > (INT_MAX-32) )
+    if (size > (MAX_MALLOC_SIZE-32))
         return NULL;
 
 #if CONFIG_MEMALIGN_HACK
@@ -127,7 +129,7 @@ void *av_realloc(void *ptr, size_t size)
 #endif
 
     /* let's disallow possible ambiguous cases */
-    if(size > (INT_MAX-16) )
+    if (size > (MAX_MALLOC_SIZE-16))
         return NULL;
 
 #if CONFIG_MEMALIGN_HACK

From 2ef241c09f6af4513f0558c295935708d40f2acd Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 23 May 2011 23:51:39 +0200
Subject: [PATCH 216/830] oggdec: use av_dlog()

Simplify.
---
 libavformat/oggdec.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 7f6536545b..344bd1ccd8 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -317,9 +317,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
     int complete = 0;
     int segp = 0, psize = 0;
 
-#if 0
-    av_log (s, AV_LOG_DEBUG, "ogg_packet: curidx=%i\n", ogg->curidx);
-#endif
+    av_dlog(s, "ogg_packet: curidx=%i\n", ogg->curidx);
 
     do{
         idx = ogg->curidx;
@@ -332,11 +330,8 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
 
         os = ogg->streams + idx;
 
-#if 0
-        av_log (s, AV_LOG_DEBUG,
-                "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n",
+        av_dlog(s, "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n",
                 idx, os->pstart, os->psize, os->segp, os->nsegs);
-#endif
 
         if (!os->codec){
             if (os->header < 0){
@@ -369,11 +364,8 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
         }
     }while (!complete);
 
-#if 0
-    av_log (s, AV_LOG_DEBUG,
-            "ogg_packet: idx %i, frame size %i, start %i\n",
-            idx, os->psize, os->pstart);
-#endif
+    av_dlog(s, "ogg_packet: idx %i, frame size %i, start %i\n",
+           idx, os->psize, os->pstart);
 
     if (os->granule == -1)
         av_log(s, AV_LOG_WARNING, "Page at %"PRId64" is missing granule\n", os->page_pos);
@@ -452,9 +444,7 @@ static int ogg_get_headers(AVFormatContext *s)
             return ret;
     }while (!ogg->headers);
 
-#if 0
-    av_log (s, AV_LOG_DEBUG, "found headers\n");
-#endif
+    av_dlog(s, "found headers\n");
 
     return 0;
 }

From 4c509fe305bc79a913ef1b690df6c910c732f608 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 24 May 2011 01:09:24 +0200
Subject: [PATCH 217/830] matroska: cleanup handling of video stereo mode

---
 doc/muxers.texi           |   4 +-
 libavformat/matroska.c    |  24 +++++++++
 libavformat/matroska.h    |  23 ++-------
 libavformat/matroskadec.c | 101 +++++++-------------------------------
 libavformat/matroskaenc.c |  66 ++++++-------------------
 5 files changed, 66 insertions(+), 152 deletions(-)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 03be693918..55b44d1018 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -232,7 +232,7 @@ Specifies the language of the track in the Matroska languages form
 
 @table @option
 
-@item STEREO_MODE=@var{mode}
+@item stereo_mode=@var{mode}
 Stereo 3D video layout of two views in a single video track
 @table @option
 @item mono
@@ -270,7 +270,7 @@ Both eyes laced in one Block, Right-eye view is first
 
 For example a 3D WebM clip can be created using the following command line:
 @example
-ffmpeg -i sample_left_right_clip.mpg -an -vcodec libvpx -metadata STEREO_MODE=left_right -y stereo_clip.webm
+ffmpeg -i sample_left_right_clip.mpg -an -vcodec libvpx -metadata stereo_mode=left_right -y stereo_clip.webm
 @end example
 
 @c man end MUXERS
diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index c7e9663316..fe9b0424a3 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -99,3 +99,27 @@ const AVMetadataConv ff_mkv_metadata_conv[] = {
     { "PART_NUMBER"   , "track"  },
     { 0 }
 };
+
+const char const *matroska_video_stereo_mode[] = {
+    "mono",
+    "left_right",
+    "bottom_top",
+    "top_bottom",
+    "checkerboard_rl",
+    "checkerboard_lr"
+    "row_interleaved_rl",
+    "row_interleaved_lr",
+    "col_interleaved_rl",
+    "col_interleaved_lr",
+    "anaglyph_cyan_red",
+    "right_left",
+    "anaglyph_green_magenta",
+    "block_lr",
+    "block_rl",
+};
+
+const char const *matroska_video_stereo_plane[] = {
+    "left",
+    "right",
+    "background",
+};
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 48959772f1..949195b757 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -223,24 +223,6 @@ typedef enum {
   MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3,
 } MatroskaTrackEncodingCompAlgo;
 
-typedef enum {
-  MATROSKA_VIDEO_STEREOMODE_MONO               = 0,
-  MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT         = 1,
-  MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP         = 2,
-  MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM         = 3,
-  MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL    = 4,
-  MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR    = 5,
-  MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL = 6,
-  MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR = 7,
-  MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL = 8,
-  MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR = 9,
-  MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED  = 10,
-  MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT         = 11,
-  MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG = 12,
-  MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR = 13,
-  MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL = 14,
-} MatroskaVideoStereoModeType;
-
 /*
  * Matroska Codec IDs, strings
  */
@@ -261,5 +243,10 @@ typedef struct CodecMime{
 extern const CodecTags ff_mkv_codec_tags[];
 extern const CodecMime ff_mkv_mime_tags[];
 extern const AVMetadataConv ff_mkv_metadata_conv[];
+extern const char const *matroska_video_stereo_mode[];
+extern const char const *matroska_video_stereo_plane[];
+
+#define MATROSKA_VIDEO_STEREO_MODE_COUNT  15
+#define MATROSKA_VIDEO_STEREO_PLANE_COUNT  3
 
 #endif /* AVFORMAT_MATROSKA_H */
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 90623bf89f..eca32773fe 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -112,7 +112,7 @@ typedef struct {
     uint64_t pixel_width;
     uint64_t pixel_height;
     uint64_t fourcc;
-    uint64_t stereoMode;
+    uint64_t stereo_mode;
 } MatroskaTrackVideo;
 
 typedef struct {
@@ -139,7 +139,6 @@ typedef struct {
 
 typedef struct {
     EbmlList combine_planes;
-    /*EbmlList join_blocks;*/
 } MatroskaTrackOperation;
 
 typedef struct {
@@ -303,7 +302,7 @@ static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
     { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
     { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
-    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_UINT, MATROSKA_VIDEO_STEREOMODE_MONO, offsetof(MatroskaTrackVideo,stereoMode) },
+    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,stereo_mode) },
     { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
@@ -1225,16 +1224,13 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
     EbmlList *chapters_list = &matroska->chapters;
     MatroskaChapter *chapters;
     MatroskaTrack *tracks;
-    EbmlList *combined_list;
-    MatroskaTrackPlane *planes;
-    char stereo_str[256];
     EbmlList *index_list;
     MatroskaIndex *index;
     int index_scale = 1;
     uint64_t max_start = 0;
     Ebml ebml = { 0 };
     AVStream *st;
-    int i, j, res;
+    int i, j, k, res;
 
     matroska->ctx = s;
 
@@ -1499,6 +1495,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
         }
 
         if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
+            MatroskaTrackPlane *planes = track->operation.combine_planes.elem;
+
             st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
             st->codec->codec_tag  = track->video.fourcc;
             st->codec->width  = track->video.pixel_width;
@@ -1513,84 +1511,23 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             if (track->default_duration)
                 st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX);
 
-            /* restore stereo mode flag as metadata tag */
-            switch (track->video.stereoMode) {
-                case MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "left_right", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "bottom_top", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "top_bottom", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "checkerboard_rl", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "checkerboard_lr", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "row_interleaved_rl", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "row_interleaved_lr", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "col_interleaved_rl", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "col_interleaved_lr", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "anaglyph_cyan_red", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "right_left", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "anaglyph_green_magenta", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "block_lr", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL:
-                    av_metadata_set2(&st->metadata, "STEREO_MODE", "block_rl", 0);
-                    break;
-                case MATROSKA_VIDEO_STEREOMODE_MONO:
-                default:
-                    /**av_metadata_set2(&st->metadata, "STEREO_MODE", "mono", 0);*/
-                    break;
-            }
+            /* export stereo mode flag as metadata tag */
+            if (track->video.stereo_mode && track->video.stereo_mode < MATROSKA_VIDEO_STEREO_MODE_COUNT)
+                av_metadata_set2(&st->metadata, "stereo_mode", matroska_video_stereo_mode[track->video.stereo_mode], 0);
 
-            /* if we have virtual track - mark the real tracks */
-            combined_list = &track->operation.combine_planes;
-            planes = combined_list->elem;
-            for (int plane_id = 0; plane_id < combined_list->nb_elem; ++plane_id) {
-                switch (planes[plane_id].type) {
-                    case 0: {
-                        snprintf(stereo_str, sizeof(stereo_str), "left_%d", i);
+            /* if we have virtual track, mark the real tracks */
+            for (j=0; j < track->operation.combine_planes.nb_elem; j++) {
+                char buf[32];
+                if (planes[j].type < MATROSKA_VIDEO_STEREO_PLANE_COUNT)
+                    continue;
+                snprintf(buf, sizeof(buf), "%s_%d",
+                         matroska_video_stereo_plane[planes[j].type], i);
+                for (k=0; k < matroska->tracks.nb_elem; k++)
+                    if (planes[j].uid == tracks[k].uid) {
+                        av_metadata_set2(&s->streams[k]->metadata,
+                                         "stereo_mode", buf, 0);
                         break;
                     }
-                    case 1: {
-                        snprintf(stereo_str, sizeof(stereo_str), "right_%d", i);
-                        break;
-                    }
-                    case 2: {
-                        snprintf(stereo_str, sizeof(stereo_str), "background_%d", i);
-                        break;
-                    }
-                    default: {
-                        continue;
-                    }
-                }
-                for (int track_id = 0; track_id < matroska->tracks.nb_elem && track_id < i; ++track_id) {
-                    MatroskaTrack *check_track = &tracks[track_id];
-                    if (planes[plane_id].uid == check_track->uid) {
-                        av_metadata_set2(&s->streams[track_id]->metadata, "STEREO_MODE", stereo_str, 0);
-                        break;
-                    }
-                }
             }
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 0fe760b684..c3e203cb36 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -587,58 +587,24 @@ static int mkv_write_tracks(AVFormatContext *s)
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width);
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height);
 
-                if ((tag = av_metadata_get(st->metadata, "STEREO_MODE", NULL, 0)) ||
-                    (tag = av_metadata_get( s->metadata, "STEREO_MODE", NULL, 0))) {
-                    // save stereomode flag
-                    uint64_t stereo_fmt = -1;
-                    int valid_fmt = 0;
+                if ((tag = av_metadata_get(st->metadata, "stereo_mode", NULL, 0)) ||
+                    (tag = av_metadata_get( s->metadata, "stereo_mode", NULL, 0))) {
+                    // save stereo mode flag
+                    uint64_t st_mode = MATROSKA_VIDEO_STEREO_MODE_COUNT;
 
-                    if (!strcmp(tag->value, "mono")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_MONO;
-                    } else if (!strcmp(tag->value, "left_right")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_LEFT_RIGHT;
-                    } else if (!strcmp(tag->value, "bottom_top")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTTOM_TOP;
-                    } else if (!strcmp(tag->value, "top_bottom")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM;
-                    } else if (!strcmp(tag->value, "checkerboard_rl")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_RL;
-                    } else if (!strcmp(tag->value, "checkerboard_lr")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_CHECKERBOARD_LR;
-                    } else if (!strcmp(tag->value, "row_interleaved_rl")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_RL;
-                    } else if (!strcmp(tag->value, "row_interleaved_lr")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ROW_INTERLEAVED_LR;
-                    } else if (!strcmp(tag->value, "col_interleaved_rl")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_RL;
-                    } else if (!strcmp(tag->value, "col_interleaved_lr")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_COL_INTERLEAVED_LR;
-                    } else if (!strcmp(tag->value, "anaglyph_cyan_red")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_CYAN_RED;
-                    } else if (!strcmp(tag->value, "right_left")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT;
-                    } else if (!strcmp(tag->value, "anaglyph_green_magenta")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_ANAGLYPH_GREEN_MAG;
-                    } else if (!strcmp(tag->value, "block_lr")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_LR;
-                    } else if (!strcmp(tag->value, "block_rl")) {
-                        stereo_fmt = MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL;
-                    }
-
-                    switch (mkv->mode) {
-                        case MODE_WEBM:
-                            if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TOP_BOTTOM
-                             || stereo_fmt == MATROSKA_VIDEO_STEREOMODE_RIGHT_LEFT)
-                            valid_fmt = 1;
+                    for (j=0; j<MATROSKA_VIDEO_STEREO_MODE_COUNT; j++)
+                        if (!strcmp(tag->value, matroska_video_stereo_mode[j])){
+                            st_mode = j;
                             break;
-                        case MODE_MATROSKAv2:
-                            if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_BOTH_EYES_BLOCK_RL)
-                                valid_fmt = 1;
-                            break;
-                    }
+                        }
 
-                    if (valid_fmt)
-                        put_ebml_uint (pb, MATROSKA_ID_VIDEOSTEREOMODE, stereo_fmt);
+                    if ((mkv->mode == MODE_WEBM && st_mode > 3 && st_mode != 11)
+                        || st_mode >= MATROSKA_VIDEO_STEREO_MODE_COUNT) {
+                        av_log(s, AV_LOG_ERROR,
+                               "The specified stereo mode is not valid.\n");
+                        return AVERROR(EINVAL);
+                    } else
+                        put_ebml_uint(pb, MATROSKA_ID_VIDEOSTEREOMODE, st_mode);
                 }
 
                 if (st->sample_aspect_ratio.num) {
@@ -786,7 +752,7 @@ static int mkv_write_tag(AVFormatContext *s, AVMetadata *m, unsigned int element
     end_ebml_master(s->pb, targets);
 
     while ((t = av_metadata_get(m, "", t, AV_METADATA_IGNORE_SUFFIX)))
-        if (strcasecmp(t->key, "title"))
+        if (strcasecmp(t->key, "title") && strcasecmp(t->key, "stereo_mode"))
             mkv_write_simpletag(s->pb, t);
 
     end_ebml_master(s->pb, tag);

From 83654c7b1b598add9041c7add6b77478eb91177f Mon Sep 17 00:00:00 2001
From: Kamil Nowosad <k.nowosad@students.mimuw.edu.pl>
Date: Mon, 23 May 2011 23:13:34 +0200
Subject: [PATCH 218/830] Add Kamil Nowosads j2k code.

This needs work but it should not rot in soc svn.
---
 doc/general.texi       |    3 +-
 libavcodec/Makefile    |    2 +
 libavcodec/allcodecs.c |    1 +
 libavcodec/j2k.c       |  390 +++++++++++++++
 libavcodec/j2k.h       |  234 +++++++++
 libavcodec/j2k_dwt.c   |  384 +++++++++++++++
 libavcodec/j2k_dwt.h   |   63 +++
 libavcodec/j2kdec.c    | 1053 ++++++++++++++++++++++++++++++++++++++++
 libavcodec/j2kenc.c    | 1045 +++++++++++++++++++++++++++++++++++++++
 libavcodec/mqc.c       |  108 +++++
 libavcodec/mqc.h       |   75 +++
 libavcodec/mqcdec.c    |   93 ++++
 libavcodec/mqcenc.c    |  119 +++++
 libavformat/img2.c     |    1 +
 libavformat/riff.c     |    1 +
 15 files changed, 3570 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/j2k.c
 create mode 100644 libavcodec/j2k.h
 create mode 100644 libavcodec/j2k_dwt.c
 create mode 100644 libavcodec/j2k_dwt.h
 create mode 100644 libavcodec/j2kdec.c
 create mode 100644 libavcodec/j2kenc.c
 create mode 100644 libavcodec/mqc.c
 create mode 100644 libavcodec/mqc.h
 create mode 100644 libavcodec/mqcdec.c
 create mode 100644 libavcodec/mqcenc.c

diff --git a/doc/general.texi b/doc/general.texi
index f9787139a4..8f0085d044 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -284,8 +284,7 @@ following image formats are supported:
     @tab Digital Picture Exchange
 @item JPEG         @tab X @tab X
     @tab Progressive JPEG is not supported.
-@item JPEG 2000    @tab   @tab E
-    @tab decoding supported through external library libopenjpeg
+@item JPEG 2000    @tab X @tab X
 @item JPEG-LS      @tab X @tab X
 @item LJPEG        @tab X @tab
     @tab Lossless JPEG
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index db184bcdce..53dd19140e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -192,6 +192,8 @@ OBJS-$(CONFIG_INDEO3_DECODER)          += indeo3.o
 OBJS-$(CONFIG_INDEO5_DECODER)          += indeo5.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
+OBJS-$(CONFIG_JPEG2000_DECODER)        += j2kdec.o mqcdec.o mqc.o j2k.o j2k_dwt.o
+#OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o j2k.o dwt.o
 OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o \
                                           mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)          += jpeglsenc.o jpegls.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index ff032dda85..1ec20106e7 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -131,6 +131,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (INDEO3, indeo3);
     REGISTER_DECODER (INDEO5, indeo5);
     REGISTER_DECODER (INTERPLAY_VIDEO, interplay_video);
+    REGISTER_DECODER (JPEG2000, jpeg2000);
     REGISTER_ENCDEC  (JPEGLS, jpegls);
     REGISTER_DECODER (JV, jv);
     REGISTER_DECODER (KGV1, kgv1);
diff --git a/libavcodec/j2k.c b/libavcodec/j2k.c
new file mode 100644
index 0000000000..3cf87bc9fa
--- /dev/null
+++ b/libavcodec/j2k.c
@@ -0,0 +1,390 @@
+/*
+ * JPEG2000 encoder and decoder common functions
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * JPEG2000 image encoder and decoder common functions
+ * @file
+ * @author Kamil Nowosad
+ */
+
+
+#include "avcodec.h"
+#include "j2k.h"
+
+#define SHL(a, n) ((n)>=0 ? (a) << (n) : (a) >> -(n))
+
+#if 0
+void ff_j2k_printv(int *tab, int l)
+{
+    int i;
+    for (i = 0; i < l; i++)
+        printf("%.3d ", tab[i]);
+    printf("\n");
+}
+
+void ff_j2k_printu(uint8_t *tab, int l)
+{
+    int i;
+    for (i = 0; i < l; i++)
+        printf("%.3hd ", tab[i]);
+    printf("\n");
+}
+#endif
+
+/* tag tree routines */
+
+/** allocate the memory for tag tree */
+
+static int tag_tree_size(int w, int h)
+{
+    int res = 0;
+    while (w > 1 || h > 1){
+        res += w * h;
+        w = (w+1) >> 1;
+        h = (h+1) >> 1;
+    }
+    return res + 1;
+}
+
+J2kTgtNode *ff_j2k_tag_tree_init(int w, int h)
+{
+    int pw = w, ph = h;
+    J2kTgtNode *res, *t, *t2;
+
+    t = res = av_mallocz(tag_tree_size(w, h)*sizeof(J2kTgtNode));
+
+    if (res == NULL)
+        return NULL;
+
+    while (w > 1 || h > 1){
+        int i, j;
+        pw = w;
+        ph = h;
+
+        w = (w+1) >> 1;
+        h = (h+1) >> 1;
+        t2 = t + pw*ph;
+
+        for (i = 0; i < ph; i++)
+            for (j = 0; j < pw; j++){
+                t[i*pw + j].parent = &t2[(i>>1)*w + (j>>1)];
+            }
+        t = t2;
+    }
+    t[0].parent = NULL;
+    return res;
+}
+
+static void tag_tree_zero(J2kTgtNode *t, int w, int h)
+{
+    int i, siz = tag_tree_size(w, h);
+
+    for (i = 0; i < siz; i++){
+        t[i].val = 0;
+        t[i].vis = 0;
+    }
+}
+
+uint8_t ff_j2k_nbctxno_lut[256][4];
+
+static int getnbctxno(int flag, int bandno)
+{
+    int h, v, d;
+
+    h = ((flag & J2K_T1_SIG_E) ? 1:0)+
+        ((flag & J2K_T1_SIG_W) ? 1:0);
+    v = ((flag & J2K_T1_SIG_N) ? 1:0)+
+        ((flag & J2K_T1_SIG_S) ? 1:0);
+    d = ((flag & J2K_T1_SIG_NE) ? 1:0)+
+        ((flag & J2K_T1_SIG_NW) ? 1:0)+
+        ((flag & J2K_T1_SIG_SE) ? 1:0)+
+        ((flag & J2K_T1_SIG_SW) ? 1:0);
+    if (bandno < 3){
+            if (bandno == 1)
+                FFSWAP(int, h, v);
+            if (h == 2) return 8;
+            if (h == 1){
+                if (v >= 1) return 7;
+                if (d >= 1) return 6;
+                return 5;
+            }
+            if (v == 2) return 4;
+            if (v == 1) return 3;
+            if (d >= 2) return 2;
+            if (d == 1) return 1;
+            return 0;
+    } else{
+            if (d >= 3) return 8;
+            if (d == 2){
+                if (h+v >= 1) return 7;
+                return 6;
+            }
+            if (d == 1){
+                if (h+v >= 2) return 5;
+                if (h+v == 1) return 4;
+                return 3;
+            }
+            if (h+v >= 2) return 2;
+            if (h+v == 1) return 1;
+            return 0;
+    }
+    assert(0);
+}
+
+uint8_t ff_j2k_sgnctxno_lut[16][16], ff_j2k_xorbit_lut[16][16];
+
+static int getsgnctxno(int flag, uint8_t *xorbit)
+{
+    int vcontrib, hcontrib;
+    static const int contribtab[3][3] = {{0, -1, 1}, {-1, -1, 0}, {1, 0, 1}};
+    static const int ctxlbltab[3][3] = {{13, 12, 11}, {10, 9, 10}, {11, 12, 13}};
+    static const int xorbittab[3][3] = {{1, 1, 1,}, {1, 0, 0}, {0, 0, 0}};
+
+    hcontrib = contribtab[flag & J2K_T1_SIG_E ? flag & J2K_T1_SGN_E ? 1:2:0]
+                         [flag & J2K_T1_SIG_W ? flag & J2K_T1_SGN_W ? 1:2:0]+1;
+    vcontrib = contribtab[flag & J2K_T1_SIG_S ? flag & J2K_T1_SGN_S ? 1:2:0]
+                         [flag & J2K_T1_SIG_N ? flag & J2K_T1_SGN_N ? 1:2:0]+1;
+    *xorbit = xorbittab[hcontrib][vcontrib];
+    return ctxlbltab[hcontrib][vcontrib];
+}
+
+void ff_j2k_init_tier1_luts()
+{
+    int i, j;
+    for (i = 0; i < 256; i++)
+        for (j = 0; j < 4; j++)
+            ff_j2k_nbctxno_lut[i][j] = getnbctxno(i, j);
+    for (i = 0; i < 16; i++)
+        for (j = 0; j < 16; j++)
+            ff_j2k_sgnctxno_lut[i][j] = getsgnctxno(i + (j << 8), &ff_j2k_xorbit_lut[i][j]);
+}
+
+void ff_j2k_set_significant(J2kT1Context *t1, int x, int y, int negative)
+{
+    x++; y++;
+    t1->flags[y][x] |= J2K_T1_SIG;
+    if (negative){
+        t1->flags[y][x+1] |= J2K_T1_SIG_W | J2K_T1_SGN_W;
+        t1->flags[y][x-1] |= J2K_T1_SIG_E | J2K_T1_SGN_E;
+        t1->flags[y+1][x] |= J2K_T1_SIG_N | J2K_T1_SGN_N;
+        t1->flags[y-1][x] |= J2K_T1_SIG_S | J2K_T1_SGN_S;
+    } else{
+        t1->flags[y][x+1] |= J2K_T1_SIG_W;
+        t1->flags[y][x-1] |= J2K_T1_SIG_E;
+        t1->flags[y+1][x] |= J2K_T1_SIG_N;
+        t1->flags[y-1][x] |= J2K_T1_SIG_S;
+    }
+    t1->flags[y+1][x+1] |= J2K_T1_SIG_NW;
+    t1->flags[y+1][x-1] |= J2K_T1_SIG_NE;
+    t1->flags[y-1][x+1] |= J2K_T1_SIG_SW;
+    t1->flags[y-1][x-1] |= J2K_T1_SIG_SE;
+}
+
+int ff_j2k_init_component(J2kComponent *comp, J2kCodingStyle *codsty, J2kQuantStyle *qntsty, int cbps, int dx, int dy)
+{
+    int reslevelno, bandno, gbandno = 0, ret, i, j, csize = 1;
+
+    if (ret=ff_j2k_dwt_init(&comp->dwt, comp->coord, codsty->nreslevels-1, codsty->transform))
+        return ret;
+    for (i = 0; i < 2; i++)
+        csize *= comp->coord[i][1] - comp->coord[i][0];
+
+    comp->data = av_malloc(csize * sizeof(int));
+    if (!comp->data)
+        return AVERROR(ENOMEM);
+    comp->reslevel = av_malloc(codsty->nreslevels * sizeof(J2kResLevel));
+
+    if (!comp->reslevel)
+        return AVERROR(ENOMEM);
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+        int declvl = codsty->nreslevels - reslevelno;
+        J2kResLevel *reslevel = comp->reslevel + reslevelno;
+
+        for (i = 0; i < 2; i++)
+            for (j = 0; j < 2; j++)
+                reslevel->coord[i][j] =
+                    ff_j2k_ceildivpow2(comp->coord[i][j], declvl - 1);
+
+        if (reslevelno == 0)
+            reslevel->nbands = 1;
+        else
+            reslevel->nbands = 3;
+
+        if (reslevel->coord[0][1] == reslevel->coord[0][0])
+            reslevel->num_precincts_x = 0;
+        else
+            reslevel->num_precincts_x = ff_j2k_ceildivpow2(reslevel->coord[0][1], codsty->log2_prec_width)
+                                        - (reslevel->coord[0][0] >> codsty->log2_prec_width);
+
+        if (reslevel->coord[1][1] == reslevel->coord[1][0])
+            reslevel->num_precincts_y = 0;
+        else
+            reslevel->num_precincts_y = ff_j2k_ceildivpow2(reslevel->coord[1][1], codsty->log2_prec_height)
+                                        - (reslevel->coord[1][0] >> codsty->log2_prec_height);
+
+        reslevel->band = av_malloc(reslevel->nbands * sizeof(J2kBand));
+        if (!reslevel->band)
+            return AVERROR(ENOMEM);
+        for (bandno = 0; bandno < reslevel->nbands; bandno++, gbandno++){
+            J2kBand *band = reslevel->band + bandno;
+            int cblkno, precx, precy, precno;
+            int x0, y0, x1, y1;
+            int xi0, yi0, xi1, yi1;
+            int cblkperprecw, cblkperprech;
+
+            if (qntsty->quantsty != J2K_QSTY_NONE){
+                const static uint8_t lut_gain[2][4] = {{0, 0, 0, 0}, {0, 1, 1, 2}};
+                int numbps;
+
+                numbps = cbps + lut_gain[codsty->transform][bandno + reslevelno>0];
+                band->stepsize = SHL(2048 + qntsty->mant[gbandno], 2 + numbps - qntsty->expn[gbandno]);
+            } else
+                band->stepsize = 1 << 13;
+
+            if (reslevelno == 0){  // the same everywhere
+                band->codeblock_width = 1 << FFMIN(codsty->log2_cblk_width, codsty->log2_prec_width-1);
+                band->codeblock_height = 1 << FFMIN(codsty->log2_cblk_height, codsty->log2_prec_height-1);
+                for (i = 0; i < 2; i++)
+                    for (j = 0; j < 2; j++)
+                        band->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j], declvl-1);
+            } else{
+                band->codeblock_width = 1 << FFMIN(codsty->log2_cblk_width, codsty->log2_prec_width);
+                band->codeblock_height = 1 << FFMIN(codsty->log2_cblk_height, codsty->log2_prec_height);
+
+                for (i = 0; i < 2; i++)
+                    for (j = 0; j < 2; j++)
+                        band->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j] - (((bandno+1>>i)&1) << declvl-1), declvl);
+            }
+            band->cblknx = ff_j2k_ceildiv(band->coord[0][1], band->codeblock_width)  - band->coord[0][0] / band->codeblock_width;
+            band->cblkny = ff_j2k_ceildiv(band->coord[1][1], band->codeblock_height) - band->coord[1][0] / band->codeblock_height;
+
+            for (j = 0; j < 2; j++)
+                band->coord[0][j] = ff_j2k_ceildiv(band->coord[0][j], dx);
+            for (j = 0; j < 2; j++)
+                band->coord[1][j] = ff_j2k_ceildiv(band->coord[1][j], dy);
+
+            band->cblknx = ff_j2k_ceildiv(band->cblknx, dx);
+            band->cblkny = ff_j2k_ceildiv(band->cblkny, dy);
+
+            band->cblk = av_malloc(band->cblknx * band->cblkny * sizeof(J2kCblk));
+            if (!band->cblk)
+                return AVERROR(ENOMEM);
+            band->prec = av_malloc(reslevel->num_precincts_x * reslevel->num_precincts_y * sizeof(J2kPrec));
+            if (!band->prec)
+                return AVERROR(ENOMEM);
+
+            for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){
+                J2kCblk *cblk = band->cblk + cblkno;
+                cblk->zero = 0;
+                cblk->lblock = 3;
+                cblk->length = 0;
+                cblk->lengthinc = 0;
+                cblk->npasses = 0;
+            }
+
+            y0 = band->coord[1][0];
+            y1 = ((band->coord[1][0] + (1<<codsty->log2_prec_height)) & ~((1<<codsty->log2_prec_height)-1)) - y0;
+            yi0 = 0;
+            yi1 = ff_j2k_ceildivpow2(y1 - y0, codsty->log2_cblk_height) << codsty->log2_cblk_height;
+            yi1 = FFMIN(yi1, band->cblkny);
+            cblkperprech = 1<<(codsty->log2_prec_height - codsty->log2_cblk_height);
+            for (precy = 0, precno = 0; precy < reslevel->num_precincts_y; precy++){
+                for (precx = 0; precx < reslevel->num_precincts_x; precx++, precno++){
+                    band->prec[precno].yi0 = yi0;
+                    band->prec[precno].yi1 = yi1;
+                }
+                yi1 += cblkperprech;
+                yi0 = yi1 - cblkperprech;
+                yi1 = FFMIN(yi1, band->cblkny);
+            }
+            x0 = band->coord[0][0];
+            x1 = ((band->coord[0][0] + (1<<codsty->log2_prec_width)) & ~((1<<codsty->log2_prec_width)-1)) - x0;
+            xi0 = 0;
+            xi1 = ff_j2k_ceildivpow2(x1 - x0, codsty->log2_cblk_width) << codsty->log2_cblk_width;
+            xi1 = FFMIN(xi1, band->cblknx);
+
+            cblkperprecw = 1<<(codsty->log2_prec_width - codsty->log2_cblk_width);
+            for (precx = 0, precno = 0; precx < reslevel->num_precincts_x; precx++){
+                for (precy = 0; precy < reslevel->num_precincts_y; precy++, precno = 0){
+                    J2kPrec *prec = band->prec + precno;
+                    prec->xi0 = xi0;
+                    prec->xi1 = xi1;
+                    prec->cblkincl = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0,
+                                                          prec->yi1 - prec->yi0);
+                    prec->zerobits = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0,
+                                                          prec->yi1 - prec->yi0);
+                    if (!prec->cblkincl || !prec->zerobits)
+                        return AVERROR(ENOMEM);
+
+                }
+                xi1 += cblkperprecw;
+                xi0 = xi1 - cblkperprecw;
+                xi1 = FFMIN(xi1, band->cblknx);
+            }
+        }
+    }
+    return 0;
+}
+
+void ff_j2k_reinit(J2kComponent *comp, J2kCodingStyle *codsty)
+{
+    int reslevelno, bandno, cblkno, precno;
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+        J2kResLevel *rlevel = comp->reslevel + reslevelno;
+        for (bandno = 0; bandno < rlevel->nbands; bandno++){
+            J2kBand *band = rlevel->band + bandno;
+            for(precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++){
+                J2kPrec *prec = band->prec + precno;
+                tag_tree_zero(prec->zerobits, prec->xi1 - prec->xi0, prec->yi1 - prec->yi0);
+                tag_tree_zero(prec->cblkincl, prec->xi1 - prec->xi0, prec->yi1 - prec->yi0);
+            }
+            for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){
+                J2kCblk *cblk = band->cblk + cblkno;
+                cblk->length = 0;
+                cblk->lblock = 3;
+            }
+        }
+    }
+}
+
+void ff_j2k_cleanup(J2kComponent *comp, J2kCodingStyle *codsty)
+{
+    int reslevelno, bandno, precno;
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+        J2kResLevel *reslevel = comp->reslevel + reslevelno;
+
+        for (bandno = 0; bandno < reslevel->nbands ; bandno++){
+            J2kBand *band = reslevel->band + bandno;
+                for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                    J2kPrec *prec = band->prec + precno;
+                    av_freep(&prec->zerobits);
+                    av_freep(&prec->cblkincl);
+                }
+                av_freep(&band->cblk);
+                av_freep(&band->prec);
+            }
+        av_freep(&reslevel->band);
+    }
+
+    ff_j2k_dwt_destroy(&comp->dwt);
+    av_freep(&comp->reslevel);
+    av_freep(&comp->data);
+}
diff --git a/libavcodec/j2k.h b/libavcodec/j2k.h
new file mode 100644
index 0000000000..3a41b5381c
--- /dev/null
+++ b/libavcodec/j2k.h
@@ -0,0 +1,234 @@
+/*
+ * JPEG2000 tables
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_J2K_H
+#define AVCODEC_J2K_H
+
+/**
+ * JPEG2000 tables
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "mqc.h"
+#include "j2k_dwt.h"
+
+enum J2kMarkers{
+    J2K_SOC = 0xff4f, ///< start of codestream
+    J2K_SIZ = 0xff51, ///< image and tile size
+    J2K_COD,          ///< coding style default
+    J2K_COC,          ///< coding style component
+    J2K_TLM = 0xff55, ///< packed packet headers, tile-part header
+    J2K_PLM = 0xff57, ///< tile-part lengths
+    J2K_PLT,          ///< packet length, main header
+    J2K_QCD = 0xff5c, ///< quantization default
+    J2K_QCC,          ///< quantization component
+    J2K_RGN,          ///< region of interest
+    J2K_POC,          ///< progression order change
+    J2K_PPM,          ///< packet length, tile-part header
+    J2K_PPT,          ///< packed packet headers, main header
+    J2K_CRG = 0xff63, ///< component registration
+    J2K_COM,          ///< comment
+    J2K_SOT = 0xff90, ///< start of tile-part
+    J2K_SOP,          ///< start of packet
+    J2K_EPH,          ///< end of packet header
+    J2K_SOD,          ///< start of data
+    J2K_EOC = 0xffd9, ///< end of codestream
+};
+
+enum J2kQuantsty{ ///< quantization style
+    J2K_QSTY_NONE, ///< no quantization
+    J2K_QSTY_SI,   ///< scalar derived
+    J2K_QSTY_SE    ///< scalar expoounded
+};
+
+#define J2K_MAX_CBLKW 64
+#define J2K_MAX_CBLKH 64
+
+// T1 flags
+// flags determining significance of neighbour coefficients
+#define J2K_T1_SIG_N  0x0001
+#define J2K_T1_SIG_E  0x0002
+#define J2K_T1_SIG_W  0x0004
+#define J2K_T1_SIG_S  0x0008
+#define J2K_T1_SIG_NE 0x0010
+#define J2K_T1_SIG_NW 0x0020
+#define J2K_T1_SIG_SE 0x0040
+#define J2K_T1_SIG_SW 0x0080
+#define J2K_T1_SIG_NB (J2K_T1_SIG_N | J2K_T1_SIG_E | J2K_T1_SIG_S | J2K_T1_SIG_W \
+                      |J2K_T1_SIG_NE | J2K_T1_SIG_NW | J2K_T1_SIG_SE | J2K_T1_SIG_SW)
+// flags determining sign bit of neighbour coefficients
+#define J2K_T1_SGN_N  0x0100
+#define J2K_T1_SGN_S  0x0200
+#define J2K_T1_SGN_W  0x0400
+#define J2K_T1_SGN_E  0x0800
+
+#define J2K_T1_VIS    0x1000
+#define J2K_T1_SIG    0x2000
+#define J2K_T1_REF    0x4000
+
+#define J2K_T1_SGN    0x8000
+
+// Codeblock coding styles
+#define J2K_CBLK_BYPASS    0x01 // Selective arithmetic coding bypass
+#define J2K_CBLK_RESET     0x02 // Reset context probabilities
+#define J2K_CBLK_TERMALL   0x04 // Terminate after each coding pass
+#define J2K_CBLK_VSC       0x08 // Vertical stripe causal context formation
+#define J2K_CBLK_PREDTERM  0x10 // Predictable termination
+#define J2K_CBLK_SEGSYM    0x20 // Segmentation symbols present
+
+// Coding styles
+#define J2K_CSTY_PREC      0x01 // Precincts defined in coding style
+#define J2K_CSTY_SOP       0x02 // SOP marker present
+#define J2K_CSTY_EPH       0x04 // EPH marker present
+
+typedef struct {
+    int data[J2K_MAX_CBLKW][J2K_MAX_CBLKH];
+    int flags[J2K_MAX_CBLKW+2][J2K_MAX_CBLKH+2];
+    MqcState mqc;
+} J2kT1Context;
+
+typedef struct J2kTgtNode {
+    uint8_t val;
+    uint8_t vis;
+    struct J2kTgtNode *parent;
+} J2kTgtNode;
+
+typedef struct {
+    uint8_t nreslevels;       ///< number of resolution levels
+    uint8_t log2_cblk_width,
+            log2_cblk_height; ///< exponent of codeblock size
+    uint8_t transform;        ///< DWT type
+    uint8_t csty;             ///< coding style
+    uint8_t log2_prec_width,
+            log2_prec_height; ///< precinct size
+    uint8_t nlayers;          ///< number of layers
+    uint8_t mct;              ///< multiple component transformation
+    uint8_t cblk_style;       ///< codeblock coding style
+} J2kCodingStyle;
+
+typedef struct {
+    uint8_t  expn[32 * 3]; ///< quantization exponent
+    uint16_t mant[32 * 3]; ///< quantization mantissa
+    uint8_t  quantsty;     ///< quantization style
+    uint8_t  nguardbits;   ///< number of guard bits
+} J2kQuantStyle;
+
+typedef struct {
+    uint16_t rate;
+    int64_t disto;
+} J2kPass;
+
+typedef struct {
+    uint8_t npasses;
+    uint8_t ninclpasses; ///< number coding of passes included in codestream
+    uint8_t nonzerobits;
+    uint16_t length;
+    uint16_t lengthinc;
+    uint8_t lblock;
+    uint8_t zero;
+    uint8_t data[8192];
+    J2kPass passes[100];
+} J2kCblk; ///< code block
+
+typedef struct {
+    uint16_t xi0, xi1, yi0, yi1; ///< codeblock indexes ([xi0, xi1))
+    J2kTgtNode *zerobits;
+    J2kTgtNode *cblkincl;
+} J2kPrec; ///< precinct
+
+typedef struct {
+    uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}}
+    uint16_t codeblock_width, codeblock_height;
+    uint16_t cblknx, cblkny;
+    uint32_t stepsize; ///< quantization stepsize (* 2^13)
+    J2kPrec *prec;
+    J2kCblk *cblk;
+} J2kBand; ///< subband
+
+typedef struct {
+    uint8_t nbands;
+    uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}}
+    uint16_t num_precincts_x, num_precincts_y; ///< number of precincts in x/y direction
+    uint8_t log2_prec_width, log2_prec_height; ///< exponent of precinct size
+    J2kBand *band;
+} J2kResLevel; ///< resolution level
+
+typedef struct {
+   J2kResLevel *reslevel;
+   DWTContext dwt;
+   int *data;
+   uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}}
+} J2kComponent;
+
+/* debug routines */
+#if 0
+#undef fprintf
+#undef printf
+void ff_j2k_printv(int *tab, int l);
+void ff_j2k_printu(uint8_t *tab, int l);
+#endif
+
+/* misc tools */
+static inline int ff_j2k_ceildivpow2(int a, int b)
+{
+    return (a + (1 << b) - 1)>> b;
+}
+
+static inline int ff_j2k_ceildiv(int a, int b)
+{
+    return (a + b - 1) / b;
+}
+
+/* tag tree routines */
+J2kTgtNode *ff_j2k_tag_tree_init(int w, int h);
+
+/* TIER-1 routines */
+void ff_j2k_init_tier1_luts(void);
+
+void ff_j2k_set_significant(J2kT1Context *t1, int x, int y, int negative);
+
+extern uint8_t ff_j2k_nbctxno_lut[256][4];
+
+static inline int ff_j2k_getnbctxno(int flag, int bandno)
+{
+    return ff_j2k_nbctxno_lut[flag&255][bandno];
+}
+
+static inline int ff_j2k_getrefctxno(int flag)
+{
+    static const uint8_t refctxno_lut[2][2] = {{14, 15}, {16, 16}};
+    return refctxno_lut[(flag>>14)&1][(flag & 255) != 0];
+}
+
+extern uint8_t ff_j2k_sgnctxno_lut[16][16], ff_j2k_xorbit_lut[16][16];
+
+static inline int ff_j2k_getsgnctxno(int flag, int *xorbit)
+{
+    *xorbit = ff_j2k_xorbit_lut[flag&15][(flag>>8)&15];
+    return  ff_j2k_sgnctxno_lut[flag&15][(flag>>8)&15];
+}
+
+int ff_j2k_init_component(J2kComponent *comp, J2kCodingStyle *codsty, J2kQuantStyle *qntsty, int cbps, int dx, int dy);
+void ff_j2k_reinit(J2kComponent *comp, J2kCodingStyle *codsty);
+void ff_j2k_cleanup(J2kComponent *comp, J2kCodingStyle *codsty);
+
+#endif /* AVCODEC_J2K_H */
diff --git a/libavcodec/j2k_dwt.c b/libavcodec/j2k_dwt.c
new file mode 100644
index 0000000000..9ba770ad83
--- /dev/null
+++ b/libavcodec/j2k_dwt.c
@@ -0,0 +1,384 @@
+/*
+ * Discrete wavelet transform
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * Discrete wavelet transform
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "j2k_dwt.h"
+
+const static float scale97[] = {1.625786, 1.230174};
+
+static inline void extend53(int *p, int i0, int i1)
+{
+    p[i0 - 1] = p[i0 + 1];
+    p[i1    ] = p[i1 - 2];
+    p[i0 - 2] = p[i0 + 2];
+    p[i1 + 1] = p[i1 - 3];
+}
+
+static inline void extend97(float *p, int i0, int i1)
+{
+    int i;
+
+    for (i = 1; i <= 4; i++){
+        p[i0 - i] = p[i0 + i];
+        p[i1 + i - 1] = p[i1 - i - 1];
+    }
+}
+
+static void sd_1d53(int *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend53(p, i0, i1);
+
+    for (i = (i0+1)/2 - 1; i < (i1+1)/2; i++)
+        p[2*i+1] -= (p[2*i] + p[2*i+2]) >> 1;
+    for (i = (i0+1)/2; i < (i1+1)/2; i++)
+        p[2*i] += (p[2*i-1] + p[2*i+1] + 2) >> 2;
+}
+
+static void dwt_encode53(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    int *line = s->linebuf;
+    line += 3;
+
+    for (lev = s->ndeclevels-1; lev >= 0; lev--){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        int *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d53(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d53(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = l[i];
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = l[i];
+        }
+    }
+}
+
+static void sd_1d97(float *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend97(p, i0, i1);
+    i0++; i1++;
+
+    for (i = i0/2 - 2; i < i1/2 + 1; i++)
+        p[2*i+1] -= 1.586134 * (p[2*i] + p[2*i+2]);
+    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+        p[2*i] -= 0.052980 * (p[2*i-1] + p[2*i+1]);
+    for (i = i0/2 - 1; i < i1/2; i++)
+        p[2*i+1] += 0.882911 * (p[2*i] + p[2*i+2]);
+    for (i = i0/2; i < i1/2; i++)
+        p[2*i] += 0.443506 * (p[2*i-1] + p[2*i+1]);
+}
+
+static void dwt_encode97(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    float *line = s->linebuf;
+    line += 5;
+
+    for (lev = s->ndeclevels-1; lev >= 0; lev--){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        float *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d97(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = scale97[mh] * l[i] / 2;
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = scale97[mh] * l[i] / 2;
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d97(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = scale97[mv] * l[i] / 2;
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = scale97[mv] * l[i] / 2;
+        }
+    }
+}
+
+static void sr_1d53(int *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend53(p, i0, i1);
+
+    for (i = i0/2; i < i1/2 + 1; i++)
+        p[2*i] -= (p[2*i-1] + p[2*i+1] + 2) >> 2;
+    for (i = i0/2; i < i1/2; i++)
+        p[2*i+1] += (p[2*i] + p[2*i+2]) >> 1;
+}
+
+static void dwt_decode53(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    int *line = s->linebuf;
+    line += 3;
+
+    for (lev = 0; lev < s->ndeclevels; lev++){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        int *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+            // copy with interleaving
+            for (i =   mh; i < lh; i+=2, j++)
+                l[i] = t[w*lp + j];
+            for (i = 1-mh; i < lh; i+=2, j++)
+                l[i] = t[w*lp + j];
+
+            sr_1d53(line, mh, mh + lh);
+
+            for (i = 0; i < lh; i++)
+                t[w*lp + i] = l[i];
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++){
+            int i, j = 0;
+            // copy with interleaving
+            for (i =   mv; i < lv; i+=2, j++)
+                l[i] = t[w*j + lp];
+            for (i = 1-mv; i < lv; i+=2, j++)
+                l[i] = t[w*j + lp];
+
+            sr_1d53(line, mv, mv + lv);
+
+            for (i = 0; i < lv; i++)
+                t[w*i + lp] = l[i];
+        }
+    }
+}
+
+static void sr_1d97(float *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend97(p, i0, i1);
+
+    for (i = i0/2 - 1; i < i1/2 + 2; i++)
+        p[2*i] -= 0.443506 * (p[2*i-1] + p[2*i+1]);
+    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+        p[2*i+1] -= 0.882911 * (p[2*i] + p[2*i+2]);
+    for (i = i0/2; i < i1/2 + 1; i++)
+        p[2*i] += 0.052980 * (p[2*i-1] + p[2*i+1]);
+    for (i = i0/2; i < i1/2; i++)
+        p[2*i+1] += 1.586134 * (p[2*i] + p[2*i+2]);
+}
+
+static void dwt_decode97(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    float *line = s->linebuf;
+    line += 5;
+
+    for (lev = 0; lev < s->ndeclevels; lev++){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        float *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+            // copy with interleaving
+            for (i =   mh; i < lh; i+=2, j++)
+                l[i] = scale97[1-mh] * t[w*lp + j];
+            for (i = 1-mh; i < lh; i+=2, j++)
+                l[i] = scale97[1-mh] * t[w*lp + j];
+
+            sr_1d97(line, mh, mh + lh);
+
+            for (i = 0; i < lh; i++)
+                t[w*lp + i] = l[i];
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++){
+            int i, j = 0;
+            // copy with interleaving
+            for (i =   mv; i < lv; i+=2, j++)
+                l[i] = scale97[1-mv] * t[w*j + lp];
+            for (i = 1-mv; i < lv; i+=2, j++)
+                l[i] = scale97[1-mv] * t[w*j + lp];
+
+            sr_1d97(line, mv, mv + lv);
+
+            for (i = 0; i < lv; i++)
+                t[w*i + lp] = l[i];
+        }
+    }
+}
+
+int ff_j2k_dwt_init(DWTContext *s, uint16_t border[2][2], int decomp_levels, int type)
+{
+    int i, j, lev = decomp_levels, maxlen,
+        b[2][2];
+
+    s->ndeclevels = decomp_levels;
+    s->type = type;
+
+    for (i = 0; i < 2; i++)
+        for(j = 0; j < 2; j++)
+            b[i][j] = border[i][j];
+
+    maxlen = FFMAX(b[0][1] - b[0][0],
+                   b[1][1] - b[1][0]);
+
+    while(--lev >= 0){
+        for (i = 0; i < 2; i++){
+            s->linelen[lev][i] = b[i][1] - b[i][0];
+            s->mod[lev][i] = b[i][0] & 1;
+            for (j = 0; j < 2; j++)
+                b[i][j] = (b[i][j] + 1) >> 1;
+        }
+    }
+    if (type == FF_DWT97)
+        s->linebuf = av_malloc((maxlen + 12) * sizeof(float));
+    else if (type == FF_DWT53)
+        s->linebuf = av_malloc((maxlen + 6) * sizeof(int));
+    else
+        return -1;
+
+    if (!s->linebuf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+int ff_j2k_dwt_encode(DWTContext *s, int *t)
+{
+    switch(s->type){
+        case FF_DWT97:
+            dwt_encode97(s, t); break;
+        case FF_DWT53:
+            dwt_encode53(s, t); break;
+        default:
+            return -1;
+    }
+    return 0;
+}
+
+int ff_j2k_dwt_decode(DWTContext *s, int *t)
+{
+    switch(s->type){
+        case FF_DWT97:
+            dwt_decode97(s, t); break;
+        case FF_DWT53:
+            dwt_decode53(s, t); break;
+        default:
+            return -1;
+    }
+    return 0;
+}
+
+void ff_j2k_dwt_destroy(DWTContext *s)
+{
+    av_freep(&s->linebuf);
+}
diff --git a/libavcodec/j2k_dwt.h b/libavcodec/j2k_dwt.h
new file mode 100644
index 0000000000..a2a25a6891
--- /dev/null
+++ b/libavcodec/j2k_dwt.h
@@ -0,0 +1,63 @@
+/*
+ * Discrete wavelet transform
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DWT_H
+#define AVCODEC_DWT_H
+
+/**
+ * Discrete wavelet transform
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "avcodec.h"
+
+#define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
+
+enum DWTType{
+    FF_DWT97,
+    FF_DWT53
+};
+
+typedef struct {
+    ///line lengths {horizontal, vertical} in consecutive decomposition levels
+    uint16_t linelen[FF_DWT_MAX_DECLVLS][2];
+    uint8_t  mod[FF_DWT_MAX_DECLVLS][2]; ///< coordinates (x0, y0) of decomp. levels mod 2
+    uint8_t  ndeclevels;                 ///< number of decomposition levels
+    uint8_t  type;                       ///< 0 for 9/7; 1 for 5/3
+    void     *linebuf;                   ///< buffer used by transform (int or float)
+} DWTContext;
+
+/**
+ * initialize DWT
+ * @param s DWT context
+ * @param border coordinates of transformed region {{x0, x1}, {y0, y1}}
+ * @param decomp_levels number of decomposition levels
+ * @param type 0 for DWT 9/7; 1 for DWT 5/3
+ */
+int ff_j2k_dwt_init(DWTContext *s, uint16_t border[2][2], int decomp_levels, int type);
+
+int ff_j2k_dwt_encode(DWTContext *s, int *t);
+int ff_j2k_dwt_decode(DWTContext *s, int *t);
+
+void ff_j2k_dwt_destroy(DWTContext *s);
+
+#endif /* AVCODEC_DWT_H */
diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c
new file mode 100644
index 0000000000..112c0ffd3e
--- /dev/null
+++ b/libavcodec/j2kdec.c
@@ -0,0 +1,1053 @@
+/*
+ * JPEG2000 image decoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * JPEG2000 image decoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "j2k.h"
+#include "libavutil/common.h"
+
+#define JP2_SIG_TYPE    0x6A502020
+#define JP2_SIG_VALUE   0x0D0A870A
+#define JP2_CODESTREAM  0x6A703263
+
+#define HAD_COC 0x01
+#define HAD_QCC 0x02
+
+typedef struct {
+   J2kComponent *comp;
+   uint8_t properties[4];
+   J2kCodingStyle codsty[4];
+   J2kQuantStyle  qntsty[4];
+} J2kTile;
+
+typedef struct {
+    AVCodecContext *avctx;
+    AVFrame picture;
+
+    int width, height; ///< image width and height
+    int image_offset_x, image_offset_y;
+    int tile_offset_x, tile_offset_y;
+    uint8_t cbps[4]; ///< bits per sample in particular components
+    uint8_t sgnd[4]; ///< if a component is signed
+    uint8_t properties[4];
+    int cdx[4], cdy[4];
+    int precision;
+    int ncomponents;
+    int tile_width, tile_height; ///< tile size
+    int numXtiles, numYtiles;
+    int maxtilelen;
+
+    J2kCodingStyle codsty[4];
+    J2kQuantStyle  qntsty[4];
+
+    uint8_t *buf_start;
+    uint8_t *buf;
+    uint8_t *buf_end;
+    int bit_index;
+
+    int16_t curtileno;
+
+    J2kTile *tile;
+} J2kDecoderContext;
+
+static int get_bits(J2kDecoderContext *s, int n)
+{
+    int res = 0;
+    if (s->buf_end - s->buf < ((n - s->bit_index) >> 8))
+        return AVERROR(EINVAL);
+    while (--n >= 0){
+        res <<= 1;
+        if (s->bit_index == 0){
+            s->bit_index = 7 + (*s->buf != 0xff);
+            s->buf++;
+        }
+        s->bit_index--;
+        res |= (*s->buf >> s->bit_index) & 1;
+    }
+    return res;
+}
+
+static void j2k_flush(J2kDecoderContext *s)
+{
+    if (*s->buf == 0xff)
+        s->buf++;
+    s->bit_index = 8;
+    s->buf++;
+}
+#if 0
+void printcomp(J2kComponent *comp)
+{
+    int i;
+    for (i = 0; i < comp->y1 - comp->y0; i++)
+        ff_j2k_printv(comp->data + i * (comp->x1 - comp->x0), comp->x1 - comp->x0);
+}
+
+static void nspaces(FILE *fd, int n)
+{
+    while(n--) putc(' ', fd);
+}
+
+static void dump(J2kDecoderContext *s, FILE *fd)
+{
+    int tileno, compno, reslevelno, bandno, precno;
+    fprintf(fd, "XSiz = %d, YSiz = %d, tile_width = %d, tile_height = %d\n"
+                "numXtiles = %d, numYtiles = %d, ncomponents = %d\n"
+                "tiles:\n",
+            s->width, s->height, s->tile_width, s->tile_height,
+            s->numXtiles, s->numYtiles, s->ncomponents);
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        J2kTile *tile = s->tile + tileno;
+        nspaces(fd, 2);
+        fprintf(fd, "tile %d:\n", tileno);
+        for(compno = 0; compno < s->ncomponents; compno++){
+            J2kComponent *comp = tile->comp + compno;
+            nspaces(fd, 4);
+            fprintf(fd, "component %d:\n", compno);
+            nspaces(fd, 4);
+            fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d\n",
+                        comp->x0, comp->x1, comp->y0, comp->y1);
+            for(reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+                J2kResLevel *reslevel = comp->reslevel + reslevelno;
+                nspaces(fd, 6);
+                fprintf(fd, "reslevel %d:\n", reslevelno);
+                nspaces(fd, 6);
+                fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d, nbands = %d\n",
+                        reslevel->x0, reslevel->x1, reslevel->y0,
+                        reslevel->y1, reslevel->nbands);
+                for(bandno = 0; bandno < reslevel->nbands; bandno++){
+                    J2kBand *band = reslevel->band + bandno;
+                    nspaces(fd, 8);
+                    fprintf(fd, "band %d:\n", bandno);
+                    nspaces(fd, 8);
+                    fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d,"
+                                "codeblock_width = %d, codeblock_height = %d cblknx = %d cblkny = %d\n",
+                                band->x0, band->x1,
+                                band->y0, band->y1,
+                                band->codeblock_width, band->codeblock_height,
+                                band->cblknx, band->cblkny);
+                    for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                        J2kPrec *prec = band->prec + precno;
+                        nspaces(fd, 10);
+                        fprintf(fd, "prec %d:\n", precno);
+                        nspaces(fd, 10);
+                        fprintf(fd, "xi0 = %d, xi1 = %d, yi0 = %d, yi1 = %d\n",
+                                     prec->xi0, prec->xi1, prec->yi0, prec->yi1);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
+
+/** decode the value stored in node */
+static int tag_tree_decode(J2kDecoderContext *s, J2kTgtNode *node, int threshold)
+{
+    J2kTgtNode *stack[30];
+    int sp = -1, curval = 0;
+
+    while(node && !node->vis){
+        stack[++sp] = node;
+        node = node->parent;
+    }
+
+    if (node)
+        curval = node->val;
+    else
+        curval = stack[sp]->val;
+
+    while(curval < threshold && sp >= 0){
+        if (curval < stack[sp]->val)
+            curval = stack[sp]->val;
+        while (curval < threshold){
+            int ret;
+            if ((ret = get_bits(s, 1)) > 0){
+                stack[sp]->vis++;
+                break;
+            } else if (!ret)
+                curval++;
+            else
+                return ret;
+        }
+        stack[sp]->val = curval;
+        sp--;
+    }
+    return curval;
+}
+
+/* marker segments */
+/** get sizes and offsets of image, tiles; number of components */
+static int get_siz(J2kDecoderContext *s)
+{
+    int i, ret;
+
+    if (s->buf_end - s->buf < 36)
+        return AVERROR(EINVAL);
+
+                        bytestream_get_be16(&s->buf); // Rsiz (skipped)
+             s->width = bytestream_get_be32(&s->buf); // width
+            s->height = bytestream_get_be32(&s->buf); // height
+    s->image_offset_x = bytestream_get_be32(&s->buf); // X0Siz
+    s->image_offset_y = bytestream_get_be32(&s->buf); // Y0Siz
+
+        s->tile_width = bytestream_get_be32(&s->buf); // XTSiz
+       s->tile_height = bytestream_get_be32(&s->buf); // YTSiz
+     s->tile_offset_x = bytestream_get_be32(&s->buf); // XT0Siz
+     s->tile_offset_y = bytestream_get_be32(&s->buf); // YT0Siz
+       s->ncomponents = bytestream_get_be16(&s->buf); // CSiz
+
+    if (s->buf_end - s->buf < 2 * s->ncomponents)
+        return AVERROR(EINVAL);
+
+    for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i
+        uint8_t x = bytestream_get_byte(&s->buf);
+        s->cbps[i] = (x & 0x7f) + 1;
+        s->precision = FFMAX(s->cbps[i], s->precision);
+        s->sgnd[i] = (x & 0x80) == 1;
+        s->cdx[i] = bytestream_get_byte(&s->buf);
+        s->cdy[i] = bytestream_get_byte(&s->buf);
+    }
+
+    s->numXtiles = ff_j2k_ceildiv(s->width - s->tile_offset_x, s->tile_width);
+    s->numYtiles = ff_j2k_ceildiv(s->height - s->tile_offset_y, s->tile_height);
+
+    s->tile = av_mallocz(s->numXtiles * s->numYtiles * sizeof(J2kTile));
+    if (!s->tile)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < s->numXtiles * s->numYtiles; i++){
+        J2kTile *tile = s->tile + i;
+
+        tile->comp = av_mallocz(s->ncomponents * sizeof(J2kComponent));
+        if (!tile->comp)
+            return AVERROR(ENOMEM);
+    }
+
+    s->avctx->width = s->width - s->image_offset_x;
+    s->avctx->height = s->height - s->image_offset_y;
+
+    switch(s->ncomponents){
+        case 1: if (s->precision > 8) {
+                    s->avctx->pix_fmt    = PIX_FMT_GRAY16;
+                } else s->avctx->pix_fmt = PIX_FMT_GRAY8;
+                break;
+        case 3: if (s->precision > 8) {
+                    s->avctx->pix_fmt    = PIX_FMT_RGB48;
+                } else s->avctx->pix_fmt = PIX_FMT_RGB24;
+                break;
+        case 4: s->avctx->pix_fmt = PIX_FMT_BGRA; break;
+    }
+
+    if (s->picture.data[0])
+        s->avctx->release_buffer(s->avctx, &s->picture);
+
+    if ((ret = s->avctx->get_buffer(s->avctx, &s->picture)) < 0)
+        return ret;
+
+    s->picture.pict_type = FF_I_TYPE;
+    s->picture.key_frame = 1;
+
+    return 0;
+}
+
+/** get common part for COD and COC segments */
+static int get_cox(J2kDecoderContext *s, J2kCodingStyle *c)
+{
+    if (s->buf_end - s->buf < 5)
+        return AVERROR(EINVAL);
+          c->nreslevels = bytestream_get_byte(&s->buf) + 1; // num of resolution levels - 1
+     c->log2_cblk_width = bytestream_get_byte(&s->buf) + 2; // cblk width
+    c->log2_cblk_height = bytestream_get_byte(&s->buf) + 2; // cblk height
+
+    c->cblk_style = bytestream_get_byte(&s->buf);
+    if (c->cblk_style != 0){ // cblk style
+        av_log(s->avctx, AV_LOG_ERROR, "no extra cblk styles supported\n");
+        return -1;
+    }
+    c->transform = bytestream_get_byte(&s->buf); // transformation
+    if (c->csty & J2K_CSTY_PREC) {
+        int i;
+        for (i = 0; i < c->nreslevels; i++)
+            bytestream_get_byte(&s->buf);
+    }
+    return 0;
+}
+
+/** get coding parameters for a particular tile or whole image*/
+static int get_cod(J2kDecoderContext *s, J2kCodingStyle *c, uint8_t *properties)
+{
+    J2kCodingStyle tmp;
+    int compno;
+
+    if (s->buf_end - s->buf < 5)
+        return AVERROR(EINVAL);
+
+    tmp.log2_prec_width  =
+    tmp.log2_prec_height = 15;
+
+    tmp.csty = bytestream_get_byte(&s->buf);
+
+    if (bytestream_get_byte(&s->buf)){ // progression level
+        av_log(s->avctx, AV_LOG_ERROR, "only LRCP progression supported\n");
+        return -1;
+    }
+
+    tmp.nlayers = bytestream_get_be16(&s->buf);
+        tmp.mct = bytestream_get_byte(&s->buf); // multiple component transformation
+
+    get_cox(s, &tmp);
+    for (compno = 0; compno < s->ncomponents; compno++){
+        if (!(properties[compno] & HAD_COC))
+            memcpy(c + compno, &tmp, sizeof(J2kCodingStyle));
+    }
+    return 0;
+}
+
+/** get coding parameters for a component in the whole image on a particular tile */
+static int get_coc(J2kDecoderContext *s, J2kCodingStyle *c, uint8_t *properties)
+{
+    int compno;
+
+    if (s->buf_end - s->buf < 2)
+        return AVERROR(EINVAL);
+
+    compno = bytestream_get_byte(&s->buf);
+
+    c += compno;
+    c->csty = bytestream_get_byte(&s->buf);
+    get_cox(s, c);
+
+    properties[compno] |= HAD_COC;
+    return 0;
+}
+
+/** get common part for QCD and QCC segments */
+static int get_qcx(J2kDecoderContext *s, int n, J2kQuantStyle *q)
+{
+    int i, x;
+
+    if (s->buf_end - s->buf < 1)
+        return AVERROR(EINVAL);
+
+    x = bytestream_get_byte(&s->buf); // Sqcd
+
+    q->nguardbits = x >> 5;
+      q->quantsty = x & 0x1f;
+
+    if (q->quantsty == J2K_QSTY_NONE){
+        n -= 3;
+        if (s->buf_end - s->buf < n)
+            return AVERROR(EINVAL);
+        for (i = 0; i < n; i++)
+            q->expn[i] = bytestream_get_byte(&s->buf) >> 3;
+    } else if (q->quantsty == J2K_QSTY_SI){
+        if (s->buf_end - s->buf < 2)
+            return AVERROR(EINVAL);
+        x = bytestream_get_be16(&s->buf);
+        q->expn[0] = x >> 11;
+        q->mant[0] = x & 0x7ff;
+        for (i = 1; i < 32 * 3; i++){
+            int curexpn = FFMAX(0, q->expn[0] - (i-1)/3);
+            q->expn[i] = curexpn;
+            q->mant[i] = q->mant[0];
+        }
+    } else{
+        n = (n - 3) >> 1;
+        if (s->buf_end - s->buf < n)
+            return AVERROR(EINVAL);
+        for (i = 0; i < n; i++){
+            x = bytestream_get_be16(&s->buf);
+            q->expn[i] = x >> 11;
+            q->mant[i] = x & 0x7ff;
+        }
+    }
+    return 0;
+}
+
+/** get quantization parameters for a particular tile or a whole image */
+static int get_qcd(J2kDecoderContext *s, int n, J2kQuantStyle *q, uint8_t *properties)
+{
+    J2kQuantStyle tmp;
+    int compno;
+
+    if (get_qcx(s, n, &tmp))
+        return -1;
+    for (compno = 0; compno < s->ncomponents; compno++)
+        if (!(properties[compno] & HAD_QCC))
+            memcpy(q + compno, &tmp, sizeof(J2kQuantStyle));
+    return 0;
+}
+
+/** get quantization parameters for a component in the whole image on in a particular tile */
+static int get_qcc(J2kDecoderContext *s, int n, J2kQuantStyle *q, uint8_t *properties)
+{
+    int compno;
+
+    if (s->buf_end - s->buf < 1)
+        return AVERROR(EINVAL);
+
+    compno = bytestream_get_byte(&s->buf);
+    properties[compno] |= HAD_QCC;
+    return get_qcx(s, n-1, q+compno);
+}
+
+/** get start of tile segment */
+static uint8_t get_sot(J2kDecoderContext *s)
+{
+    if (s->buf_end - s->buf < 4)
+        return AVERROR(EINVAL);
+
+    s->curtileno = bytestream_get_be16(&s->buf); ///< Isot
+
+    s->buf += 4; ///< Psot (ignored)
+
+    if (!bytestream_get_byte(&s->buf)){ ///< TPsot
+        J2kTile *tile = s->tile + s->curtileno;
+
+        /* copy defaults */
+        memcpy(tile->codsty, s->codsty, s->ncomponents * sizeof(J2kCodingStyle));
+        memcpy(tile->qntsty, s->qntsty, s->ncomponents * sizeof(J2kQuantStyle));
+    }
+    bytestream_get_byte(&s->buf); ///< TNsot
+
+    return 0;
+}
+
+static int init_tile(J2kDecoderContext *s, int tileno)
+{
+    int compno,
+        tilex = tileno % s->numXtiles,
+        tiley = tileno / s->numXtiles;
+    J2kTile *tile = s->tile + tileno;
+
+    if (!tile->comp)
+        return AVERROR(ENOMEM);
+    for (compno = 0; compno < s->ncomponents; compno++){
+        J2kComponent *comp = tile->comp + compno;
+        J2kCodingStyle *codsty = tile->codsty + compno;
+        J2kQuantStyle  *qntsty = tile->qntsty + compno;
+        int ret; // global bandno
+
+        comp->coord[0][0] = FFMAX(tilex * s->tile_width + s->tile_offset_x, s->image_offset_x);
+        comp->coord[0][1] = FFMIN((tilex+1)*s->tile_width + s->tile_offset_x, s->width);
+        comp->coord[1][0] = FFMAX(tiley * s->tile_height + s->tile_offset_y, s->image_offset_y);
+        comp->coord[1][1] = FFMIN((tiley+1)*s->tile_height + s->tile_offset_y, s->height);
+
+        if (ret = ff_j2k_init_component(comp, codsty, qntsty, s->cbps[compno], s->cdx[compno], s->cdy[compno]))
+            return ret;
+    }
+    return 0;
+}
+
+/** read the number of coding passes */
+static int getnpasses(J2kDecoderContext *s)
+{
+    int num;
+    if (!get_bits(s, 1))
+        return 1;
+    if (!get_bits(s, 1))
+        return 2;
+    if ((num = get_bits(s, 2)) != 3)
+        return num < 0 ? num : 3 + num;
+    if ((num = get_bits(s, 5)) != 31)
+        return num < 0 ? num : 6 + num;
+    num = get_bits(s, 7);
+    return num < 0 ? num : 37 + num;
+}
+
+static int getlblockinc(J2kDecoderContext *s)
+{
+    int res = 0, ret;
+    while (ret = get_bits(s, 1)){
+        if (ret < 0)
+            return ret;
+        res++;
+    }
+    return res;
+}
+
+static int decode_packet(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kResLevel *rlevel, int precno,
+                         int layno, uint8_t *expn, int numgbits)
+{
+    int bandno, cblkny, cblknx, cblkno, ret;
+
+    if (!(ret = get_bits(s, 1))){
+        j2k_flush(s);
+        return 0;
+    } else if (ret < 0)
+        return ret;
+
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        J2kBand *band = rlevel->band + bandno;
+        J2kPrec *prec = band->prec + precno;
+        int pos = 0;
+
+        if (band->coord[0][0] == band->coord[0][1]
+        ||  band->coord[1][0] == band->coord[1][1])
+            continue;
+
+        for (cblkny = prec->yi0; cblkny < prec->yi1; cblkny++)
+            for(cblknx = prec->xi0, cblkno = cblkny * band->cblknx + cblknx; cblknx < prec->xi1; cblknx++, cblkno++, pos++){
+                J2kCblk *cblk = band->cblk + cblkno;
+                int incl, newpasses, llen;
+
+                if (cblk->npasses)
+                    incl = get_bits(s, 1);
+                else
+                    incl = tag_tree_decode(s, prec->cblkincl + pos, layno+1) == layno;
+                if (!incl)
+                    continue;
+                else if (incl < 0)
+                    return incl;
+
+                if (!cblk->npasses)
+                    cblk->nonzerobits = expn[bandno] + numgbits - 1 - tag_tree_decode(s, prec->zerobits + pos, 100);
+                if ((newpasses = getnpasses(s)) < 0)
+                    return newpasses;
+                if ((llen = getlblockinc(s)) < 0)
+                    return llen;
+                cblk->lblock += llen;
+                if ((ret = get_bits(s, av_log2(newpasses) + cblk->lblock)) < 0)
+                    return ret;
+                cblk->lengthinc = ret;
+                cblk->npasses += newpasses;
+            }
+    }
+    j2k_flush(s);
+
+    if (codsty->csty & J2K_CSTY_EPH) {
+        if (AV_RB16(s->buf) == J2K_EPH) {
+            s->buf += 2;
+        } else {
+            av_log(s->avctx, AV_LOG_ERROR, "EPH marker not found.\n");
+        }
+    }
+
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        J2kBand *band = rlevel->band + bandno;
+        int yi, cblknw = band->prec[precno].xi1 - band->prec[precno].xi0;
+        for (yi = band->prec[precno].yi0; yi < band->prec[precno].yi1; yi++){
+            int xi;
+            for (xi = band->prec[precno].xi0; xi < band->prec[precno].xi1; xi++){
+                J2kCblk *cblk = band->cblk + yi * cblknw + xi;
+                if (s->buf_end - s->buf < cblk->lengthinc)
+                    return AVERROR(EINVAL);
+                bytestream_get_buffer(&s->buf, cblk->data, cblk->lengthinc);
+                cblk->length += cblk->lengthinc;
+                cblk->lengthinc = 0;
+            }
+        }
+    }
+    return 0;
+}
+
+static int decode_packets(J2kDecoderContext *s, J2kTile *tile)
+{
+    int layno, reslevelno, compno, precno, ok_reslevel;
+    s->bit_index = 8;
+    for (layno = 0; layno < tile->codsty[0].nlayers; layno++){
+        ok_reslevel = 1;
+        for (reslevelno = 0; ok_reslevel; reslevelno++){
+            ok_reslevel = 0;
+            for (compno = 0; compno < s->ncomponents; compno++){
+                J2kCodingStyle *codsty = tile->codsty + compno;
+                J2kQuantStyle  *qntsty = tile->qntsty + compno;
+                if (reslevelno < codsty->nreslevels){
+                    J2kResLevel *rlevel = tile->comp[compno].reslevel + reslevelno;
+                    ok_reslevel = 1;
+                    for (precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++){
+                        if (decode_packet(s, codsty, rlevel, precno, layno, qntsty->expn +
+                                          (reslevelno ? 3*(reslevelno-1)+1 : 0), qntsty->nguardbits))
+                            return -1;
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/* TIER-1 routines */
+static void decode_sigpass(J2kT1Context *t1, int width, int height, int bpno, int bandno)
+{
+    int mask = 3 << (bpno - 1), y0, x, y;
+
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++){
+                if ((t1->flags[y+1][x+1] & J2K_T1_SIG_NB)
+                && !(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){
+                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno))){
+                        int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+
+                        t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? -mask : mask;
+
+                        ff_j2k_set_significant(t1, x, y, t1->data[y][x] < 0);
+                    }
+                    t1->flags[y+1][x+1] |= J2K_T1_VIS;
+                }
+            }
+}
+
+static void decode_refpass(J2kT1Context *t1, int width, int height, int bpno)
+{
+    int phalf, nhalf;
+    int y0, x, y;
+
+    phalf = 1 << (bpno - 1);
+    nhalf = -phalf;
+
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++){
+                if ((t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)) == J2K_T1_SIG){
+                    int ctxno = ff_j2k_getrefctxno(t1->flags[y+1][x+1]);
+                    int r = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? phalf : nhalf;
+                    t1->data[y][x] += t1->data[y][x] < 0 ? -r : r;
+                    t1->flags[y+1][x+1] |= J2K_T1_REF;
+                }
+            }
+}
+
+static void decode_clnpass(J2kDecoderContext *s, J2kT1Context *t1, int width, int height,
+                           int bpno, int bandno, int seg_symbols)
+{
+    int mask = 3 << (bpno - 1), y0, x, y, runlen, dec;
+
+    for (y0 = 0; y0 < height; y0 += 4) {
+        for (x = 0; x < width; x++){
+            if (y0 + 3 < height && !(
+            (t1->flags[y0+1][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+2][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+3][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+4][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)))){
+                if (!ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL))
+                    continue;
+                runlen = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+                runlen = (runlen << 1) | ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+                dec = 1;
+            } else{
+                runlen = 0;
+                dec = 0;
+            }
+
+            for (y = y0 + runlen; y < y0 + 4 && y < height; y++){
+                if (!dec){
+                    if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)))
+                        dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno));
+                }
+                if (dec){
+                    int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                    t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? -mask : mask;
+                    ff_j2k_set_significant(t1, x, y, t1->data[y][x] < 0);
+                }
+                dec = 0;
+                t1->flags[y+1][x+1] &= ~J2K_T1_VIS;
+            }
+        }
+    }
+    if (seg_symbols) {
+        int val;
+        val = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+        val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+        val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+        val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI);
+        if (val != 0xa) {
+            av_log(s->avctx, AV_LOG_ERROR,"Segmentation symbol value incorrect\n");
+        }
+    }
+}
+
+static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Context *t1, J2kCblk *cblk,
+                       int width, int height, int bandpos)
+{
+    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y;
+
+    for (y = 0; y < height+2; y++)
+        memset(t1->flags[y], 0, (width+2)*sizeof(int));
+
+    for (y = 0; y < height; y++)
+        memset(t1->data[y], 0, width*sizeof(int));
+
+    ff_mqc_initdec(&t1->mqc, cblk->data);
+    cblk->data[cblk->length] = 0xff;
+    cblk->data[cblk->length+1] = 0xff;
+
+    while(passno--){
+        switch(pass_t){
+            case 0: decode_sigpass(t1, width, height, bpno+1, bandpos);
+                    break;
+            case 1: decode_refpass(t1, width, height, bpno+1);
+                    break;
+            case 2: decode_clnpass(s, t1, width, height, bpno+1, bandpos,
+                                   codsty->cblk_style & J2K_CBLK_SEGSYM);
+                    break;
+        }
+
+        pass_t++;
+        if (pass_t == 3){
+            bpno--;
+            pass_t = 0;
+        }
+    }
+    return 0;
+}
+
+static void mct_decode(J2kDecoderContext *s, J2kTile *tile)
+{
+    int i, *src[3], i0, i1, i2, csize = 1;
+
+    for (i = 0; i < 3; i++)
+        src[i] = tile->comp[i].data;
+
+    for (i = 0; i < 2; i++)
+        csize *= tile->comp[0].coord[i][1] - tile->comp[0].coord[i][0];
+
+    if (tile->codsty[0].transform == FF_DWT97){
+        for (i = 0; i < csize; i++){
+            i0 = *src[0] + (*src[2] * 46802 >> 16);
+            i1 = *src[0] - (*src[1] * 22553 + *src[2] * 46802 >> 16);
+            i2 = *src[0] + (116130 * *src[1] >> 16);
+            *src[0]++ = i0;
+            *src[1]++ = i1;
+            *src[2]++ = i2;
+        }
+    } else{
+        for (i = 0; i < csize; i++){
+            i1 = *src[0] - (*src[2] + *src[1] >> 2);
+            i0 = i1 + *src[2];
+            i2 = i1 + *src[1];
+            *src[0]++ = i0;
+            *src[1]++ = i1;
+            *src[2]++ = i2;
+        }
+    }
+}
+
+static int decode_tile(J2kDecoderContext *s, J2kTile *tile)
+{
+    int compno, reslevelno, bandno;
+    int x, y, *src[4];
+    uint8_t *line;
+    J2kT1Context t1;
+
+    for (compno = 0; compno < s->ncomponents; compno++){
+        J2kComponent *comp = tile->comp + compno;
+        J2kCodingStyle *codsty = tile->codsty + compno;
+
+        for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+            J2kResLevel *rlevel = comp->reslevel + reslevelno;
+            for (bandno = 0; bandno < rlevel->nbands; bandno++){
+                J2kBand *band = rlevel->band + bandno;
+                int cblkx, cblky, cblkno=0, xx0, x0, xx1, y0, yy0, yy1, bandpos;
+
+                bandpos = bandno + (reslevelno > 0);
+
+                yy0 = bandno == 0 ? 0 : comp->reslevel[reslevelno-1].coord[1][1] - comp->reslevel[reslevelno-1].coord[1][0];
+                y0 = yy0;
+                yy1 = FFMIN(ff_j2k_ceildiv(band->coord[1][0] + 1, band->codeblock_height) * band->codeblock_height,
+                            band->coord[1][1]) - band->coord[1][0] + yy0;
+
+                if (band->coord[0][0] == band->coord[0][1] || band->coord[1][0] == band->coord[1][1])
+                    continue;
+
+                for (cblky = 0; cblky < band->cblkny; cblky++){
+                    if (reslevelno == 0 || bandno == 1)
+                        xx0 = 0;
+                    else
+                        xx0 = comp->reslevel[reslevelno-1].coord[0][1] - comp->reslevel[reslevelno-1].coord[0][0];
+                    x0 = xx0;
+                    xx1 = FFMIN(ff_j2k_ceildiv(band->coord[0][0] + 1, band->codeblock_width) * band->codeblock_width,
+                                band->coord[0][1]) - band->coord[0][0] + xx0;
+
+                    for (cblkx = 0; cblkx < band->cblknx; cblkx++, cblkno++){
+                        int y, x;
+                        decode_cblk(s, codsty, &t1, band->cblk + cblkno, xx1 - xx0, yy1 - yy0, bandpos);
+                        if (codsty->transform == FF_DWT53){
+                            for (y = yy0; y < yy1; y+=s->cdy[compno]){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x+=s->cdx[compno]){
+                                    comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] = *ptr++ >> 1;
+                                }
+                            }
+                        } else{
+                            for (y = yy0; y < yy1; y+=s->cdy[compno]){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x+=s->cdx[compno]){
+                                    int tmp = ((int64_t)*ptr++) * ((int64_t)band->stepsize) >> 13, tmp2;
+                                    tmp2 = FFABS(tmp>>1) + FFABS(tmp&1);
+                                    comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] = tmp < 0 ? -tmp2 : tmp2;
+                                }
+                            }
+                        }
+                        xx0 = xx1;
+                        xx1 = FFMIN(xx1 + band->codeblock_width, band->coord[0][1] - band->coord[0][0] + x0);
+                    }
+                    yy0 = yy1;
+                    yy1 = FFMIN(yy1 + band->codeblock_height, band->coord[1][1] - band->coord[1][0] + y0);
+                }
+            }
+        }
+        ff_j2k_dwt_decode(&comp->dwt, comp->data);
+        src[compno] = comp->data;
+    }
+    if (tile->codsty[0].mct)
+        mct_decode(s, tile);
+
+    if (s->avctx->pix_fmt == PIX_FMT_BGRA) // RGBA -> BGRA
+        FFSWAP(int *, src[0], src[2]);
+
+    if (s->precision <= 8) {
+        for (compno = 0; compno < s->ncomponents; compno++){
+            y = tile->comp[compno].coord[1][0] - s->image_offset_y;
+            line = s->picture.data[0] + y * s->picture.linesize[0];
+            for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]){
+                uint8_t *dst;
+
+                x = tile->comp[compno].coord[0][0] - s->image_offset_x;
+                dst = line + x * s->ncomponents + compno;
+
+                for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s->cdx[compno]) {
+                    *src[compno] += 1 << (s->cbps[compno]-1);
+                    if (*src[compno] < 0)
+                        *src[compno] = 0;
+                    else if (*src[compno] >= (1 << s->cbps[compno]))
+                        *src[compno] = (1 << s->cbps[compno]) - 1;
+                    *dst = *src[compno]++;
+                    dst += s->ncomponents;
+                }
+                line += s->picture.linesize[0];
+            }
+        }
+    } else {
+        for (compno = 0; compno < s->ncomponents; compno++) {
+            y = tile->comp[compno].coord[1][0] - s->image_offset_y;
+            line = s->picture.data[0] + y * s->picture.linesize[0];
+            for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) {
+                uint16_t *dst;
+                x = tile->comp[compno].coord[0][0] - s->image_offset_x;
+                dst = line + (x * s->ncomponents + compno) * 2;
+                for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s-> cdx[compno]) {
+                    int32_t val;
+                    val = *src[compno]++ << (16 - s->cbps[compno]);
+                    val += 1 << 15;
+                    val = av_clip(val, 0, (1 << 16) - 1);
+                    *dst = val;
+                    dst += s->ncomponents;
+                }
+                line += s->picture.linesize[0];
+            }
+        }
+    }
+    return 0;
+}
+
+static void cleanup(J2kDecoderContext *s)
+{
+    int tileno, compno;
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        for (compno = 0; compno < s->ncomponents; compno++){
+            J2kComponent *comp = s->tile[tileno].comp + compno;
+            J2kCodingStyle *codsty = s->tile[tileno].codsty + compno;
+
+            ff_j2k_cleanup(comp, codsty);
+        }
+        av_freep(&s->tile[tileno].comp);
+    }
+    av_freep(&s->tile);
+}
+
+static int decode_codestream(J2kDecoderContext *s)
+{
+    J2kCodingStyle *codsty = s->codsty;
+    J2kQuantStyle  *qntsty = s->qntsty;
+    uint8_t *properties = s->properties;
+
+    for (;;){
+        int marker, len, ret = 0;
+        uint8_t *oldbuf;
+        if (s->buf_end - s->buf < 2){
+            av_log(s->avctx, AV_LOG_ERROR, "Missing EOC\n");
+            break;
+        }
+
+        marker = bytestream_get_be16(&s->buf);
+        oldbuf = s->buf;
+
+        if (marker == J2K_SOD){
+            J2kTile *tile = s->tile + s->curtileno;
+            if (ret = init_tile(s, s->curtileno))
+                return ret;
+            if (ret = decode_packets(s, tile))
+                return ret;
+            continue;
+        }
+        if (marker == J2K_EOC)
+            break;
+
+        if (s->buf_end - s->buf < 2)
+            return AVERROR(EINVAL);
+        len = bytestream_get_be16(&s->buf);
+        switch(marker){
+            case J2K_SIZ:
+                ret = get_siz(s); break;
+            case J2K_COC:
+                ret = get_coc(s, codsty, properties); break;
+            case J2K_COD:
+                ret = get_cod(s, codsty, properties); break;
+            case J2K_QCC:
+                ret = get_qcc(s, len, qntsty, properties); break;
+            case J2K_QCD:
+                ret = get_qcd(s, len, qntsty, properties); break;
+            case J2K_SOT:
+                if (!(ret = get_sot(s))){
+                    codsty = s->tile[s->curtileno].codsty;
+                    qntsty = s->tile[s->curtileno].qntsty;
+                    properties = s->tile[s->curtileno].properties;
+                }
+                break;
+            case J2K_COM:
+                // the comment is ignored
+                s->buf += len - 2; break;
+            default:
+                av_log(s->avctx, AV_LOG_ERROR, "unsupported marker 0x%.4X at pos 0x%x\n", marker, s->buf - s->buf_start - 4);
+                s->buf += len - 2; break;
+        }
+        if (s->buf - oldbuf != len || ret){
+            av_log(s->avctx, AV_LOG_ERROR, "error during processing marker segment %.4x\n", marker);
+            return ret ? ret : -1;
+        }
+    }
+    return 0;
+}
+
+static int jp2_find_codestream(J2kDecoderContext *s)
+{
+    int32_t atom_size;
+    int found_codestream = 0, search_range = 10;
+
+    // skip jpeg2k signature atom
+    s->buf += 12;
+
+    while(!found_codestream && search_range) {
+        atom_size = AV_RB32(s->buf);
+        if(AV_RB32(s->buf + 4) == JP2_CODESTREAM) {
+            found_codestream = 1;
+            s->buf += 8;
+        } else {
+            s->buf += atom_size;
+            search_range--;
+        }
+    }
+
+    if(found_codestream)
+        return 1;
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        AVPacket *avpkt)
+{
+    J2kDecoderContext *s = avctx->priv_data;
+    AVFrame *picture = data;
+    int tileno, ret;
+
+    s->avctx = avctx;
+    av_log(s->avctx, AV_LOG_DEBUG, "start\n");
+
+    // init
+    s->buf = s->buf_start = avpkt->data;
+    s->buf_end = s->buf_start + avpkt->size;
+    s->curtileno = -1;
+
+    ff_j2k_init_tier1_luts();
+
+    if (s->buf_end - s->buf < 2)
+        return AVERROR(EINVAL);
+
+    // check if the image is in jp2 format
+    if((AV_RB32(s->buf) == 12) && (AV_RB32(s->buf + 4) == JP2_SIG_TYPE) &&
+       (AV_RB32(s->buf + 8) == JP2_SIG_VALUE)) {
+        if(!jp2_find_codestream(s)) {
+            av_log(avctx, AV_LOG_ERROR, "couldn't find jpeg2k codestream atom\n");
+            return -1;
+        }
+    }
+
+    if (bytestream_get_be16(&s->buf) != J2K_SOC){
+        av_log(avctx, AV_LOG_ERROR, "SOC marker not present\n");
+        return -1;
+    }
+    if (ret = decode_codestream(s))
+        return ret;
+
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++)
+        if (ret = decode_tile(s, s->tile + tileno))
+            return ret;
+
+    cleanup(s);
+    av_log(s->avctx, AV_LOG_DEBUG, "end\n");
+
+    *data_size = sizeof(AVPicture);
+    *picture = s->picture;
+
+    return s->buf - s->buf_start;
+}
+
+static av_cold int j2kdec_init(AVCodecContext *avctx)
+{
+    J2kDecoderContext *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame = (AVFrame*)&s->picture;
+    return 0;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    J2kDecoderContext *s = avctx->priv_data;
+
+    if (s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
+    return 0;
+}
+
+AVCodec ff_jpeg2000_decoder = {
+    "j2k",
+    AVMEDIA_TYPE_VIDEO,
+    CODEC_ID_JPEG2000,
+    sizeof(J2kDecoderContext),
+    j2kdec_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    0,
+    .pix_fmts =
+        (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24, -1}
+};
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
new file mode 100644
index 0000000000..123b796a76
--- /dev/null
+++ b/libavcodec/j2kenc.c
@@ -0,0 +1,1045 @@
+/*
+ * JPEG2000 image encoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * JPEG2000 image encoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include <float.h>
+#include "avcodec.h"
+#include "bytestream.h"
+#include "j2k.h"
+#include "libavutil/common.h"
+
+#define NMSEDEC_BITS 7
+#define NMSEDEC_FRACBITS (NMSEDEC_BITS-1)
+#define WMSEDEC_SHIFT 13 ///< must be >= 13
+#define LAMBDA_SCALE (100000000LL << (WMSEDEC_SHIFT - 13))
+
+static int lut_nmsedec_ref [1<<NMSEDEC_BITS],
+           lut_nmsedec_ref0[1<<NMSEDEC_BITS],
+           lut_nmsedec_sig [1<<NMSEDEC_BITS],
+           lut_nmsedec_sig0[1<<NMSEDEC_BITS];
+
+static const int dwt_norms[2][4][10] = { // [dwt_type][band][rlevel] (multiplied by 10000)
+    {{10000, 19650, 41770,  84030, 169000, 338400,  676900, 1353000, 2706000, 5409000},
+     {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000},
+     {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000},
+     {20800, 38650, 83070, 171800, 347100, 695900, 1393000, 2786000, 5572000}},
+
+    {{10000, 15000, 27500, 53750, 106800, 213400, 426700, 853300, 1707000, 3413000},
+     {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000},
+     {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000},
+     { 7186,  9218, 15860, 30430,  60190, 120100, 240000, 479700,  959300}}
+};
+
+typedef struct {
+   J2kComponent *comp;
+} J2kTile;
+
+typedef struct {
+    AVCodecContext *avctx;
+    AVFrame *picture;
+
+    int width, height; ///< image width and height
+    uint8_t cbps[4]; ///< bits per sample in particular components
+    int chroma_shift[2];
+    uint8_t planar;
+    int ncomponents;
+    int tile_width, tile_height; ///< tile size
+    int numXtiles, numYtiles;
+
+    uint8_t *buf_start;
+    uint8_t *buf;
+    uint8_t *buf_end;
+    int bit_index;
+
+    int64_t lambda;
+
+    J2kCodingStyle codsty;
+    J2kQuantStyle  qntsty;
+
+    J2kTile *tile;
+} J2kEncoderContext;
+
+
+/* debug */
+#if 0
+#undef ifprintf
+#undef printf
+
+static void nspaces(FILE *fd, int n)
+{
+    while(n--) putc(' ', fd);
+}
+
+static void printv(int *tab, int l)
+{
+    int i;
+    for (i = 0; i < l; i++)
+        printf("%.3d ", tab[i]);
+    printf("\n");
+}
+
+static void printu(uint8_t *tab, int l)
+{
+    int i;
+    for (i = 0; i < l; i++)
+        printf("%.3hd ", tab[i]);
+    printf("\n");
+}
+
+static void printcomp(J2kComponent *comp)
+{
+    int i;
+    for (i = 0; i < comp->y1 - comp->y0; i++)
+        printv(comp->data + i * (comp->x1 - comp->x0), comp->x1 - comp->x0);
+}
+
+static void dump(J2kEncoderContext *s, FILE *fd)
+{
+    int tileno, compno, reslevelno, bandno, precno;
+    fprintf(fd, "XSiz = %d, YSiz = %d, tile_width = %d, tile_height = %d\n"
+                "numXtiles = %d, numYtiles = %d, ncomponents = %d\n"
+                "tiles:\n",
+            s->width, s->height, s->tile_width, s->tile_height,
+            s->numXtiles, s->numYtiles, s->ncomponents);
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        J2kTile *tile = s->tile + tileno;
+        nspaces(fd, 2);
+        fprintf(fd, "tile %d:\n", tileno);
+        for(compno = 0; compno < s->ncomponents; compno++){
+            J2kComponent *comp = tile->comp + compno;
+            nspaces(fd, 4);
+            fprintf(fd, "component %d:\n", compno);
+            nspaces(fd, 4);
+            fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d\n",
+                        comp->x0, comp->x1, comp->y0, comp->y1);
+            for(reslevelno = 0; reslevelno < s->nreslevels; reslevelno++){
+                J2kResLevel *reslevel = comp->reslevel + reslevelno;
+                nspaces(fd, 6);
+                fprintf(fd, "reslevel %d:\n", reslevelno);
+                nspaces(fd, 6);
+                fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d, nbands = %d\n",
+                        reslevel->x0, reslevel->x1, reslevel->y0,
+                        reslevel->y1, reslevel->nbands);
+                for(bandno = 0; bandno < reslevel->nbands; bandno++){
+                    J2kBand *band = reslevel->band + bandno;
+                    nspaces(fd, 8);
+                    fprintf(fd, "band %d:\n", bandno);
+                    nspaces(fd, 8);
+                    fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d,"
+                                "codeblock_width = %d, codeblock_height = %d cblknx = %d cblkny = %d\n",
+                                band->x0, band->x1,
+                                band->y0, band->y1,
+                                band->codeblock_width, band->codeblock_height,
+                                band->cblknx, band->cblkny);
+                    for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                        J2kPrec *prec = band->prec + precno;
+                        nspaces(fd, 10);
+                        fprintf(fd, "prec %d:\n", precno);
+                        nspaces(fd, 10);
+                        fprintf(fd, "xi0 = %d, xi1 = %d, yi0 = %d, yi1 = %d\n",
+                                     prec->xi0, prec->xi1, prec->yi0, prec->yi1);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
+
+/* bitstream routines */
+
+/** put n times val bit */
+static void put_bits(J2kEncoderContext *s, int val, int n) // TODO: optimize
+{
+    while (n-- > 0){
+        if (s->bit_index == 8)
+        {
+            s->bit_index = *s->buf == 0xff;
+            *(++s->buf) = 0;
+        }
+        *s->buf |= val << (7 - s->bit_index++);
+    }
+}
+
+/** put n least significant bits of a number num */
+static void put_num(J2kEncoderContext *s, int num, int n)
+{
+    while(--n >= 0)
+        put_bits(s, (num >> n) & 1, 1);
+}
+
+/** flush the bitstream */
+static void j2k_flush(J2kEncoderContext *s)
+{
+    if (s->bit_index){
+        s->bit_index = 0;
+        s->buf++;
+    }
+}
+
+/* tag tree routines */
+
+/** code the value stored in node */
+static void tag_tree_code(J2kEncoderContext *s, J2kTgtNode *node, int threshold)
+{
+    J2kTgtNode *stack[30];
+    int sp = 1, curval = 0;
+    stack[0] = node;
+
+    node = node->parent;
+    while(node){
+        if (node->vis){
+            curval = node->val;
+            break;
+        }
+        node->vis++;
+        stack[sp++] = node;
+        node = node->parent;
+    }
+    while(--sp >= 0){
+        if (stack[sp]->val >= threshold){
+            put_bits(s, 0, threshold - curval);
+            break;
+        }
+        put_bits(s, 0, stack[sp]->val - curval);
+        put_bits(s, 1, 1);
+        curval = stack[sp]->val;
+    }
+}
+
+/** update the value in node */
+static void tag_tree_update(J2kTgtNode *node)
+{
+    int lev = 0;
+    while (node->parent){
+        if (node->parent->val <= node->val)
+            break;
+        node->parent->val = node->val;
+        node = node->parent;
+        lev++;
+    }
+}
+
+static int put_siz(J2kEncoderContext *s)
+{
+    int i;
+
+    if (s->buf_end - s->buf < 40 + 3 * s->ncomponents)
+        return -1;
+
+    bytestream_put_be16(&s->buf, J2K_SIZ);
+    bytestream_put_be16(&s->buf, 38 + 3 * s->ncomponents); // Lsiz
+    bytestream_put_be16(&s->buf, 0); // Rsiz
+    bytestream_put_be32(&s->buf, s->width); // width
+    bytestream_put_be32(&s->buf, s->height); // height
+    bytestream_put_be32(&s->buf, 0); // X0Siz
+    bytestream_put_be32(&s->buf, 0); // Y0Siz
+
+    bytestream_put_be32(&s->buf, s->tile_width); // XTSiz
+    bytestream_put_be32(&s->buf, s->tile_height); // YTSiz
+    bytestream_put_be32(&s->buf, 0); // XT0Siz
+    bytestream_put_be32(&s->buf, 0); // YT0Siz
+    bytestream_put_be16(&s->buf, s->ncomponents); // CSiz
+
+    for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i
+        bytestream_put_byte(&s->buf, 7);
+        bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[0]:1);
+        bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[1]:1);
+    }
+    return 0;
+}
+
+static int put_cod(J2kEncoderContext *s)
+{
+    J2kCodingStyle *codsty = &s->codsty;
+
+    if (s->buf_end - s->buf < 14)
+        return -1;
+
+    bytestream_put_be16(&s->buf, J2K_COD);
+    bytestream_put_be16(&s->buf, 12); // Lcod
+    bytestream_put_byte(&s->buf, 0);  // Scod
+    // SGcod
+    bytestream_put_byte(&s->buf, 0); // progression level
+    bytestream_put_be16(&s->buf, 1); // num of layers
+    bytestream_put_byte(&s->buf, 0); // multiple component transformation
+    // SPcod
+    bytestream_put_byte(&s->buf, codsty->nreslevels - 1); // num of decomp. levels
+    bytestream_put_byte(&s->buf, codsty->log2_cblk_width-2); // cblk width
+    bytestream_put_byte(&s->buf, codsty->log2_cblk_height-2); // cblk height
+    bytestream_put_byte(&s->buf, 0); // cblk style
+    bytestream_put_byte(&s->buf, codsty->transform); // transformation
+    return 0;
+}
+
+static int put_qcd(J2kEncoderContext *s, int compno)
+{
+    int i, size;
+    J2kCodingStyle *codsty = &s->codsty;
+    J2kQuantStyle  *qntsty = &s->qntsty;
+
+    if (qntsty->quantsty == J2K_QSTY_NONE)
+        size = 4 + 3 * (codsty->nreslevels-1);
+    else // QSTY_SE
+        size = 5 + 6 * (codsty->nreslevels-1);
+
+    if (s->buf_end - s->buf < size + 2)
+        return -1;
+
+    bytestream_put_be16(&s->buf, J2K_QCD);
+    bytestream_put_be16(&s->buf, size);  // LQcd
+    bytestream_put_byte(&s->buf, (qntsty->nguardbits << 5) | qntsty->quantsty);  // Sqcd
+    if (qntsty->quantsty == J2K_QSTY_NONE)
+        for (i = 0; i < codsty->nreslevels * 3 - 2; i++)
+            bytestream_put_byte(&s->buf, qntsty->expn[i] << 3);
+    else // QSTY_SE
+        for (i = 0; i < codsty->nreslevels * 3 - 2; i++)
+            bytestream_put_be16(&s->buf, (qntsty->expn[i] << 11) | qntsty->mant[i]);
+    return 0;
+}
+
+static uint8_t *put_sot(J2kEncoderContext *s, int tileno)
+{
+    uint8_t *psotptr;
+
+    if (s->buf_end - s->buf < 12)
+        return -1;
+
+    bytestream_put_be16(&s->buf, J2K_SOT);
+    bytestream_put_be16(&s->buf, 10); // Lsot
+    bytestream_put_be16(&s->buf, tileno); // Isot
+
+    psotptr = s->buf;
+    bytestream_put_be32(&s->buf, 0); // Psot (filled in later)
+
+    bytestream_put_byte(&s->buf, 0); // TPsot
+    bytestream_put_byte(&s->buf, 1); // TNsot
+    return psotptr;
+}
+
+/**
+ * compute the sizes of tiles, resolution levels, bands, etc.
+ * allocate memory for them
+ * divide the input image into tile-components
+ */
+static int init_tiles(J2kEncoderContext *s)
+{
+    int tileno, tilex, tiley, compno;
+    J2kCodingStyle *codsty = &s->codsty;
+    J2kQuantStyle  *qntsty = &s->qntsty;
+
+    s->numXtiles = ff_j2k_ceildiv(s->width, s->tile_width);
+    s->numYtiles = ff_j2k_ceildiv(s->height, s->tile_height);
+
+    s->tile = av_malloc(s->numXtiles * s->numYtiles * sizeof(J2kTile));
+    if (!s->tile)
+        return AVERROR(ENOMEM);
+    for (tileno = 0, tiley = 0; tiley < s->numYtiles; tiley++)
+        for (tilex = 0; tilex < s->numXtiles; tilex++, tileno++){
+            J2kTile *tile = s->tile + tileno;
+
+            tile->comp = av_malloc(s->ncomponents * sizeof(J2kComponent));
+            if (!tile->comp)
+                return AVERROR(ENOMEM);
+            for (compno = 0; compno < s->ncomponents; compno++){
+                J2kComponent *comp = tile->comp + compno;
+                int ret, i, j;
+
+                comp->coord[0][0] = tilex * s->tile_width;
+                comp->coord[0][1] = FFMIN((tilex+1)*s->tile_width, s->width);
+                comp->coord[1][0] = tiley * s->tile_height;
+                comp->coord[1][1] = FFMIN((tiley+1)*s->tile_height, s->height);
+                if (compno > 0)
+                    for (i = 0; i < 2; i++)
+                        for (j = 0; j < 2; j++)
+                            comp->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j], s->chroma_shift[i]);
+
+                if (ret = ff_j2k_init_component(comp, codsty, qntsty, s->cbps[compno]))
+                    return ret;
+            }
+        }
+    return 0;
+}
+
+static void copy_frame(J2kEncoderContext *s)
+{
+    int tileno, compno, i, y, x;
+    uint8_t *line;
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        J2kTile *tile = s->tile + tileno;
+        if (s->planar){
+            for (compno = 0; compno < s->ncomponents; compno++){
+                J2kComponent *comp = tile->comp + compno;
+                int *dst = comp->data;
+                line = s->picture->data[compno]
+                       + comp->coord[1][0] * s->picture->linesize[compno]
+                       + comp->coord[0][0];
+                for (y = comp->coord[1][0]; y < comp->coord[1][1]; y++){
+                    uint8_t *ptr = line;
+                    for (x = comp->coord[0][0]; x < comp->coord[0][1]; x++)
+                        *dst++ = *ptr++ - (1 << 7);
+                    line += s->picture->linesize[compno];
+                }
+            }
+        } else{
+            line = s->picture->data[0] + tile->comp[0].coord[1][0] * s->picture->linesize[0]
+                   + tile->comp[0].coord[0][0] * s->ncomponents;
+
+            i = 0;
+            for (y = tile->comp[0].coord[1][0]; y < tile->comp[0].coord[1][1]; y++){
+                uint8_t *ptr = line;
+                for (x = tile->comp[0].coord[0][0]; x < tile->comp[0].coord[0][1]; x++, i++){
+                    for (compno = 0; compno < s->ncomponents; compno++){
+                        tile->comp[compno].data[i] = *ptr++  - (1 << 7);
+                    }
+                }
+                line += s->picture->linesize[0];
+            }
+        }
+    }
+}
+
+static void init_quantization(J2kEncoderContext *s)
+{
+    int compno, reslevelno, bandno;
+    J2kQuantStyle  *qntsty = &s->qntsty;
+    J2kCodingStyle *codsty = &s->codsty;
+
+    for (compno = 0; compno < s->ncomponents; compno++){
+        int gbandno = 0;
+        for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+            int nbands, lev = codsty->nreslevels - reslevelno - 1;
+            nbands = reslevelno ? 3 : 1;
+            for (bandno = 0; bandno < nbands; bandno++, gbandno++){
+                int expn, mant;
+
+                if (codsty->transform == FF_DWT97){
+                    int bandpos = bandno + (reslevelno>0),
+                        ss = 81920000 / dwt_norms[0][bandpos][lev],
+                        log = av_log2(ss);
+                    mant = (11 - log < 0 ? ss >> log - 11 : ss << 11 - log) & 0x7ff;
+                    expn = s->cbps[compno] - log + 13;
+                } else
+                    expn = ((bandno&2)>>1) + (reslevelno>0) + s->cbps[compno];
+
+                qntsty->expn[gbandno] = expn;
+                qntsty->mant[gbandno] = mant;
+            }
+        }
+    }
+}
+
+static void init_luts()
+{
+    int i, a,
+        mask = ~((1<<NMSEDEC_FRACBITS)-1);
+
+    for (i = 0; i < (1 << NMSEDEC_BITS); i++){
+        lut_nmsedec_sig[i]  = FFMAX(6*i - (9<<NMSEDEC_FRACBITS-1) << 12-NMSEDEC_FRACBITS, 0);
+        lut_nmsedec_sig0[i] = FFMAX((i*i + (1<<NMSEDEC_FRACBITS-1) & mask) << 1, 0);
+
+        a = (i >> (NMSEDEC_BITS-2)&2) + 1;
+        lut_nmsedec_ref[i]  = FFMAX((-2*i + (1<<NMSEDEC_FRACBITS) + a*i - (a*a<<NMSEDEC_FRACBITS-2))
+                                    << 13-NMSEDEC_FRACBITS, 0);
+        lut_nmsedec_ref0[i] = FFMAX(((i*i + (1-4*i << NMSEDEC_FRACBITS-1) + (1<<2*NMSEDEC_FRACBITS)) & mask)
+                                    << 1, 0);
+    }
+}
+
+/* tier-1 routines */
+static int getnmsedec_sig(int x, int bpno)
+{
+    if (bpno > NMSEDEC_FRACBITS)
+        return lut_nmsedec_sig[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)];
+    return lut_nmsedec_sig0[x & ((1 << NMSEDEC_BITS) - 1)];
+}
+
+static int getnmsedec_ref(int x, int bpno)
+{
+    if (bpno > NMSEDEC_FRACBITS)
+        return lut_nmsedec_ref[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)];
+    return lut_nmsedec_ref0[x & ((1 << NMSEDEC_BITS) - 1)];
+}
+
+static void encode_sigpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++){
+                if (!(t1->flags[y+1][x+1] & J2K_T1_SIG) && (t1->flags[y+1][x+1] & J2K_T1_SIG_NB)){
+                    int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno),
+                        bit = t1->data[y][x] & mask ? 1 : 0;
+                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, bit);
+                    if (bit){
+                        int xorbit;
+                        int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                        *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                        ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                    }
+                    t1->flags[y+1][x+1] |= J2K_T1_VIS;
+                }
+            }
+}
+
+static void encode_refpass(J2kT1Context *t1, int width, int height, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++)
+                if ((t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)) == J2K_T1_SIG){
+                    int ctxno = ff_j2k_getrefctxno(t1->flags[y+1][x+1]);
+                    *nmsedec += getnmsedec_ref(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                    t1->flags[y+1][x+1] |= J2K_T1_REF;
+                }
+}
+
+static void encode_clnpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++){
+            if (y0 + 3 < height && !(
+            (t1->flags[y0+1][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+2][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+3][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) ||
+            (t1->flags[y0+4][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG))))
+            {
+                // aggregation mode
+                int rlen;
+                for (rlen = 0; rlen < 4; rlen++)
+                    if (t1->data[y0+rlen][x] & mask)
+                        break;
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL, rlen != 4);
+                if (rlen == 4)
+                    continue;
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen >> 1);
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen & 1);
+                for (y = y0 + rlen; y < y0 + 4; y++){
+                    if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){
+                        int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno);
+                        if (y > y0 + rlen)
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                        if (t1->data[y][x] & mask){ // newly significant
+                            int xorbit;
+                            int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                            ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                        }
+                    }
+                    t1->flags[y+1][x+1] &= ~J2K_T1_VIS;
+                }
+            } else{
+                for (y = y0; y < y0 + 4 && y < height; y++){
+                    if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){
+                        int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                        if (t1->data[y][x] & mask){ // newly significant
+                            int xorbit;
+                            int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                            ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                        }
+                    }
+                    t1->flags[y+1][x+1] &= ~J2K_T1_VIS;
+                }
+            }
+        }
+}
+
+static void encode_cblk(J2kEncoderContext *s, J2kT1Context *t1, J2kCblk *cblk, J2kTile *tile,
+                        int width, int height, int bandpos, int lev)
+{
+    int pass_t = 2, passno, x, y, max=0, nmsedec, bpno;
+    int64_t wmsedec = 0;
+
+    for (y = 0; y < height+2; y++)
+        memset(t1->flags[y], 0, (width+2)*sizeof(int));
+
+    for (y = 0; y < height; y++){
+        for (x = 0; x < width; x++){
+            if (t1->data[y][x] < 0){
+                t1->flags[y+1][x+1] |= J2K_T1_SGN;
+                t1->data[y][x] = -t1->data[y][x];
+            }
+            max = FFMAX(max, t1->data[y][x]);
+        }
+    }
+
+    if (max == 0){
+        cblk->nonzerobits = 0;
+        bpno = 0;
+    } else{
+        cblk->nonzerobits = av_log2(max) + 1 - NMSEDEC_FRACBITS;
+        bpno = cblk->nonzerobits - 1;
+    }
+
+    ff_mqc_initenc(&t1->mqc, cblk->data);
+
+    for (passno = 0; bpno >= 0; passno++){
+        nmsedec=0;
+
+        switch(pass_t){
+            case 0: encode_sigpass(t1, width, height, bandpos, &nmsedec, bpno);
+                    break;
+            case 1: encode_refpass(t1, width, height, &nmsedec, bpno);
+                    break;
+            case 2: encode_clnpass(t1, width, height, bandpos, &nmsedec, bpno);
+                    break;
+        }
+
+        cblk->passes[passno].rate = 3 + ff_mqc_length(&t1->mqc);
+        wmsedec += (int64_t)nmsedec << (2*bpno);
+        cblk->passes[passno].disto = wmsedec;
+
+        if (++pass_t == 3){
+            pass_t = 0;
+            bpno--;
+        }
+    }
+    cblk->npasses = passno;
+    cblk->ninclpasses = passno;
+
+    // TODO: optional flush on each pass
+    cblk->passes[passno-1].rate = ff_mqc_flush(&t1->mqc);
+}
+
+/* tier-2 routines: */
+
+static void putnumpasses(J2kEncoderContext *s, int n)
+{
+    if (n == 1)
+        put_num(s, 0, 1);
+    else if (n == 2)
+        put_num(s, 2, 2);
+    else if (n <= 5)
+        put_num(s, 0xc | (n-3), 4);
+    else if (n <= 36)
+        put_num(s, 0x1e0 | (n-6), 9);
+    else
+        put_num(s, 0xff80 | (n-37), 16);
+}
+
+
+static int encode_packet(J2kEncoderContext *s, J2kResLevel *rlevel, int precno,
+                          uint8_t *expn, int numgbits)
+{
+    int bandno, empty = 1;
+
+    // init bitstream
+    *s->buf = 0;
+    s->bit_index = 0;
+
+    // header
+
+    // is the packet empty?
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        if (rlevel->band[bandno].coord[0][0] < rlevel->band[bandno].coord[0][1]
+        &&  rlevel->band[bandno].coord[1][0] < rlevel->band[bandno].coord[1][1]){
+            empty = 0;
+            break;
+        }
+    }
+
+    put_bits(s, !empty, 1);
+    if (empty){
+        j2k_flush(s);
+        return 0;
+    }
+
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        J2kBand *band = rlevel->band + bandno;
+        J2kPrec *prec = band->prec + precno;
+        int yi, xi, pos;
+        int cblknw = prec->xi1 - prec->xi0;
+
+        if (band->coord[0][0] == band->coord[0][1]
+        ||  band->coord[1][0] == band->coord[1][1])
+            continue;
+
+        for (pos=0, yi = prec->yi0; yi < prec->yi1; yi++){
+            for (xi = prec->xi0; xi < prec->xi1; xi++, pos++){
+                prec->cblkincl[pos].val = band->cblk[yi * cblknw + xi].ninclpasses == 0;
+                tag_tree_update(prec->cblkincl + pos);
+                prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - band->cblk[yi * cblknw + xi].nonzerobits;
+                tag_tree_update(prec->zerobits + pos);
+            }
+        }
+
+        for (pos=0, yi = prec->yi0; yi < prec->yi1; yi++){
+            for (xi = prec->xi0; xi < prec->xi1; xi++, pos++){
+                int pad = 0, llen, length;
+                J2kCblk *cblk = band->cblk + yi * cblknw + xi;
+
+                if (s->buf_end - s->buf < 20) // approximately
+                    return -1;
+
+                // inclusion information
+                tag_tree_code(s, prec->cblkincl + pos, 1);
+                if (!cblk->ninclpasses)
+                    continue;
+                // zerobits information
+                tag_tree_code(s, prec->zerobits + pos, 100);
+                // number of passes
+                putnumpasses(s, cblk->ninclpasses);
+
+                length = cblk->passes[cblk->ninclpasses-1].rate;
+                llen = av_log2(length) - av_log2(cblk->ninclpasses) - 2;
+                if (llen < 0){
+                    pad = -llen;
+                    llen = 0;
+                }
+                // length of code block
+                put_bits(s, 1, llen);
+                put_bits(s, 0, 1);
+                put_num(s, length, av_log2(length)+1+pad);
+            }
+        }
+    }
+    j2k_flush(s);
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        J2kBand *band = rlevel->band + bandno;
+        J2kPrec *prec = band->prec + precno;
+        int yi, cblknw = prec->xi1 - prec->xi0;
+        for (yi = prec->yi0; yi < prec->yi1; yi++){
+            int xi;
+            for (xi = prec->xi0; xi < prec->xi1; xi++){
+                J2kCblk *cblk = band->cblk + yi * cblknw + xi;
+                if (cblk->ninclpasses){
+                    if (s->buf_end - s->buf < cblk->passes[cblk->ninclpasses-1].rate)
+                        return -1;
+                    bytestream_put_buffer(&s->buf, cblk->data, cblk->passes[cblk->ninclpasses-1].rate);
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static int encode_packets(J2kEncoderContext *s, J2kTile *tile, int tileno)
+{
+    int compno, reslevelno, ret;
+    J2kCodingStyle *codsty = &s->codsty;
+    J2kQuantStyle  *qntsty = &s->qntsty;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "tier2\n");
+    // lay-rlevel-comp-pos progression
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+        for (compno = 0; compno < s->ncomponents; compno++){
+            int precno;
+            J2kResLevel *reslevel = s->tile[tileno].comp[compno].reslevel + reslevelno;
+            for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                if (ret = encode_packet(s, reslevel, precno, qntsty->expn + (reslevelno ? 3*reslevelno-2 : 0),
+                              qntsty->nguardbits))
+                    return ret;
+            }
+        }
+    }
+    av_log(s->avctx, AV_LOG_DEBUG, "after tier2\n");
+    return 0;
+}
+
+static int getcut(J2kCblk *cblk, int64_t lambda, int dwt_norm)
+{
+    int passno, res = 0;
+    for (passno = 0; passno < cblk->npasses; passno++){
+        int dr;
+        int64_t dd;
+
+        dr = cblk->passes[passno].rate
+           - (res ? cblk->passes[res-1].rate:0);
+        dd = cblk->passes[passno].disto
+           - (res ? cblk->passes[res-1].disto:0);
+
+        if (((dd * dwt_norm) >> WMSEDEC_SHIFT) * dwt_norm >= dr * lambda)
+            res = passno+1;
+    }
+    return res;
+}
+
+static void truncpasses(J2kEncoderContext *s, J2kTile *tile)
+{
+    int compno, reslevelno, bandno, cblkno, lev;
+    J2kCodingStyle *codsty = &s->codsty;
+
+    for (compno = 0; compno < s->ncomponents; compno++){
+        J2kComponent *comp = tile->comp + compno;
+
+        for (reslevelno = 0, lev = codsty->nreslevels-1; reslevelno < codsty->nreslevels; reslevelno++, lev--){
+            J2kResLevel *reslevel = comp->reslevel + reslevelno;
+
+            for (bandno = 0; bandno < reslevel->nbands ; bandno++){
+                int bandpos = bandno + (reslevelno > 0);
+                J2kBand *band = reslevel->band + bandno;
+
+                for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){
+                    J2kCblk *cblk = band->cblk + cblkno;
+
+                    cblk->ninclpasses = getcut(cblk, s->lambda,
+                            (int64_t)dwt_norms[codsty->transform][bandpos][lev] * (int64_t)band->stepsize >> 13);
+                }
+            }
+        }
+    }
+}
+
+static int encode_tile(J2kEncoderContext *s, J2kTile *tile, int tileno)
+{
+    int compno, reslevelno, bandno, ret;
+    J2kT1Context t1;
+    J2kCodingStyle *codsty = &s->codsty;
+    for (compno = 0; compno < s->ncomponents; compno++){
+        J2kComponent *comp = s->tile[tileno].comp + compno;
+
+        av_log(s->avctx, AV_LOG_DEBUG,"dwt\n");
+        if (ret = ff_dwt_encode(&comp->dwt, comp->data))
+            return ret;
+        av_log(s->avctx, AV_LOG_DEBUG,"after dwt -> tier1\n");
+
+        for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+            J2kResLevel *reslevel = comp->reslevel + reslevelno;
+
+            for (bandno = 0; bandno < reslevel->nbands ; bandno++){
+                J2kBand *band = reslevel->band + bandno;
+                int cblkx, cblky, cblkno=0, xx0, x0, xx1, y0, yy0, yy1, bandpos;
+                yy0 = bandno == 0 ? 0 : comp->reslevel[reslevelno-1].coord[1][1] - comp->reslevel[reslevelno-1].coord[1][0];
+                y0 = yy0;
+                yy1 = FFMIN(ff_j2k_ceildiv(band->coord[1][0] + 1, band->codeblock_height) * band->codeblock_height,
+                            band->coord[1][1]) - band->coord[1][0] + yy0;
+
+                if (band->coord[0][0] == band->coord[0][1] || band->coord[1][0] == band->coord[1][1])
+                    continue;
+
+                bandpos = bandno + (reslevelno > 0);
+
+                for (cblky = 0; cblky < band->cblkny; cblky++){
+                    if (reslevelno == 0 || bandno == 1)
+                        xx0 = 0;
+                    else
+                        xx0 = comp->reslevel[reslevelno-1].coord[0][1] - comp->reslevel[reslevelno-1].coord[0][0];
+                    x0 = xx0;
+                    xx1 = FFMIN(ff_j2k_ceildiv(band->coord[0][0] + 1, band->codeblock_width) * band->codeblock_width,
+                                band->coord[0][1]) - band->coord[0][0] + xx0;
+
+                    for (cblkx = 0; cblkx < band->cblknx; cblkx++, cblkno++){
+                        int y, x;
+                        if (codsty->transform == FF_DWT53){
+                            for (y = yy0; y < yy1; y++){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x++){
+                                    *ptr++ = comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] << NMSEDEC_FRACBITS;
+                                }
+                            }
+                        } else{
+                            for (y = yy0; y < yy1; y++){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x++){
+                                    *ptr = (comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]);
+                                    *ptr++ = (int64_t)*ptr * (int64_t)(8192 * 8192 / band->stepsize) >> 13 - NMSEDEC_FRACBITS;
+                                }
+                            }
+                        }
+                        encode_cblk(s, &t1, band->cblk + cblkno, tile, xx1 - xx0, yy1 - yy0,
+                                    bandpos, codsty->nreslevels - reslevelno - 1);
+                        xx0 = xx1;
+                        xx1 = FFMIN(xx1 + band->codeblock_width, band->coord[0][1] - band->coord[0][0] + x0);
+                    }
+                    yy0 = yy1;
+                    yy1 = FFMIN(yy1 + band->codeblock_height, band->coord[1][1] - band->coord[1][0] + y0);
+                }
+            }
+        }
+        av_log(s->avctx, AV_LOG_DEBUG, "after tier1\n");
+    }
+
+    av_log(s->avctx, AV_LOG_DEBUG, "rate control\n");
+    truncpasses(s, tile);
+    if (ret = encode_packets(s, tile, tileno))
+        return ret;
+    av_log(s->avctx, AV_LOG_DEBUG, "after rate control\n");
+    return 0;
+}
+
+void cleanup(J2kEncoderContext *s)
+{
+    int tileno, compno;
+    J2kCodingStyle *codsty = &s->codsty;
+
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        for (compno = 0; compno < s->ncomponents; compno++){
+            J2kComponent *comp = s->tile[tileno].comp + compno;
+            ff_j2k_cleanup(comp, codsty);
+        }
+        av_freep(&s->tile[tileno].comp);
+    }
+    av_freep(&s->tile);
+}
+
+static void reinit(J2kEncoderContext *s)
+{
+    int tileno, compno;
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        J2kTile *tile = s->tile + tileno;
+        for (compno = 0; compno < s->ncomponents; compno++)
+            ff_j2k_reinit(tile->comp + compno, &s->codsty);
+    }
+}
+
+static int encode_frame(AVCodecContext *avctx,
+                        uint8_t *buf, int buf_size,
+                        void *data)
+{
+    int tileno, ret;
+    J2kEncoderContext *s = avctx->priv_data;
+
+    // init:
+    s->buf = s->buf_start = buf;
+    s->buf_end = buf + buf_size;
+
+    s->picture = data;
+
+    s->lambda = s->picture->quality * LAMBDA_SCALE;
+
+    copy_frame(s);
+    reinit(s);
+
+    if (s->buf_end - s->buf < 2)
+        return -1;
+    bytestream_put_be16(&s->buf, J2K_SOC);
+    if (ret = put_siz(s))
+        return ret;
+    if (ret = put_cod(s))
+        return ret;
+    if (ret = put_qcd(s, 0))
+        return ret;
+
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        uint8_t *psotptr;
+        if ((psotptr = put_sot(s, tileno)) < 0)
+            return psotptr;
+        if (s->buf_end - s->buf < 2)
+            return -1;
+        bytestream_put_be16(&s->buf, J2K_SOD);
+        if (ret = encode_tile(s, s->tile + tileno, tileno))
+            return ret;
+        bytestream_put_be32(&psotptr, s->buf - psotptr + 6);
+    }
+    if (s->buf_end - s->buf < 2)
+        return -1;
+    bytestream_put_be16(&s->buf, J2K_EOC);
+
+    av_log(s->avctx, AV_LOG_DEBUG, "end\n");
+    return s->buf - s->buf_start;
+}
+
+static av_cold int j2kenc_init(AVCodecContext *avctx)
+{
+    int i, ret;
+    J2kEncoderContext *s = avctx->priv_data;
+    J2kCodingStyle *codsty = &s->codsty;
+    J2kQuantStyle  *qntsty = &s->qntsty;
+
+    s->avctx = avctx;
+    av_log(s->avctx, AV_LOG_DEBUG, "init\n");
+
+    // defaults:
+    // TODO: implement setting non-standard precinct size
+    codsty->log2_prec_width  = 15;
+    codsty->log2_prec_height = 15;
+    codsty->nreslevels       = 7;
+    codsty->log2_cblk_width  = 4;
+    codsty->log2_cblk_height = 4;
+    codsty->transform        = 1;
+
+    qntsty->nguardbits       = 1;
+
+    s->tile_width            = 256;
+    s->tile_height           = 256;
+
+    if (codsty->transform == FF_DWT53)
+        qntsty->quantsty = J2K_QSTY_NONE;
+    else
+        qntsty->quantsty = J2K_QSTY_SE;
+
+    s->width = avctx->width;
+    s->height = avctx->height;
+
+    for (i = 0; i < 3; i++)
+        s->cbps[i] = 8;
+
+    if (avctx->pix_fmt == PIX_FMT_RGB24){
+        s->ncomponents = 3;
+    } else if (avctx->pix_fmt == PIX_FMT_GRAY8){
+        s->ncomponents = 1;
+    } else{ // planar YUV
+        s->planar = 1;
+        s->ncomponents = 3;
+        avcodec_get_chroma_sub_sample(avctx->pix_fmt,
+                s->chroma_shift, s->chroma_shift + 1);
+    }
+
+    ff_j2k_init_tier1_luts();
+
+    init_luts();
+
+    init_quantization(s);
+    if (ret=init_tiles(s))
+        return ret;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "after init\n");
+
+    return 0;
+}
+
+static int j2kenc_destroy(AVCodecContext *avctx)
+{
+    J2kEncoderContext *s = avctx->priv_data;
+
+    cleanup(s);
+    return 0;
+}
+
+AVCodec jpeg2000_encoder = {
+    "j2k",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_JPEG2000,
+    sizeof(J2kEncoderContext),
+    j2kenc_init,
+    encode_frame,
+    j2kenc_destroy,
+    NULL,
+    0,
+    .pix_fmts =
+        (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24,
+                              PIX_FMT_YUV422P, PIX_FMT_YUV444P,
+                              PIX_FMT_YUV410P, PIX_FMT_YUV411P,
+                              -1}
+};
diff --git a/libavcodec/mqc.c b/libavcodec/mqc.c
new file mode 100644
index 0000000000..700b9574c1
--- /dev/null
+++ b/libavcodec/mqc.c
@@ -0,0 +1,108 @@
+/*
+ * MQ-coder encoder and decoder common functions
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MQ-coder ecoder and decoder common functions
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "mqc.h"
+
+typedef struct {
+        uint16_t qe;
+        uint8_t  nmps;
+        uint8_t  nlps;
+        uint8_t  sw;
+} MqcCxState;
+
+const static MqcCxState cx_states[47] = {
+    {0x5601,  1,  1, 1},
+    {0x3401,  2,  6, 0},
+    {0x1801,  3,  9, 0},
+    {0x0AC1,  4, 12, 0},
+    {0x0521,  5, 29, 0},
+    {0x0221, 38, 33, 0},
+    {0x5601,  7,  6, 1},
+    {0x5401,  8, 14, 0},
+    {0x4801,  9, 14, 0},
+    {0x3801, 10, 14, 0},
+    {0x3001, 11, 17, 0},
+    {0x2401, 12, 18, 0},
+    {0x1C01, 13, 20, 0},
+    {0x1601, 29, 21, 0},
+    {0x5601, 15, 14, 1},
+    {0x5401, 16, 14, 0},
+    {0x5101, 17, 15, 0},
+    {0x4801, 18, 16, 0},
+    {0x3801, 19, 17, 0},
+    {0x3401, 20, 18, 0},
+    {0x3001, 21, 19, 0},
+    {0x2801, 22, 19, 0},
+    {0x2401, 23, 20, 0},
+    {0x2201, 24, 21, 0},
+    {0x1C01, 25, 22, 0},
+    {0x1801, 26, 23, 0},
+    {0x1601, 27, 24, 0},
+    {0x1401, 28, 25, 0},
+    {0x1201, 29, 26, 0},
+    {0x1101, 30, 27, 0},
+    {0x0AC1, 31, 28, 0},
+    {0x09C1, 32, 29, 0},
+    {0x08A1, 33, 30, 0},
+    {0x0521, 34, 31, 0},
+    {0x0441, 35, 32, 0},
+    {0x02A1, 36, 33, 0},
+    {0x0221, 37, 34, 0},
+    {0x0141, 38, 35, 0},
+    {0x0111, 39, 36, 0},
+    {0x0085, 40, 37, 0},
+    {0x0049, 41, 38, 0},
+    {0x0025, 42, 39, 0},
+    {0x0015, 43, 40, 0},
+    {0x0009, 44, 41, 0},
+    {0x0005, 45, 42, 0},
+    {0x0001, 45, 43, 0},
+    {0x5601, 46, 46, 0}
+};
+
+uint16_t ff_mqc_qe [2*47];
+uint8_t ff_mqc_nlps[2*47];
+uint8_t ff_mqc_nmps[2*47];
+
+void ff_mqc_init_contexts(MqcState *mqc)
+{
+    int i;
+    memset(mqc->cx_states, 0, sizeof(mqc->cx_states));
+    mqc->cx_states[MQC_CX_UNI] = 2 * 46;
+    mqc->cx_states[MQC_CX_RL] = 2 * 3;
+    mqc->cx_states[0] = 2 * 4;
+
+    for (i = 0; i < 47; i++){
+        ff_mqc_qe[2*i  ] =
+        ff_mqc_qe[2*i+1] = cx_states[i].qe;
+
+        ff_mqc_nlps[2*i  ] = 2*cx_states[i].nlps + cx_states[i].sw;
+        ff_mqc_nlps[2*i+1] = 2*cx_states[i].nlps + 1 - cx_states[i].sw;
+        ff_mqc_nmps[2*i  ] = 2*cx_states[i].nmps;
+        ff_mqc_nmps[2*i+1] = 2*cx_states[i].nmps + 1;
+    }
+}
diff --git a/libavcodec/mqc.h b/libavcodec/mqc.h
new file mode 100644
index 0000000000..b28c13ec48
--- /dev/null
+++ b/libavcodec/mqc.h
@@ -0,0 +1,75 @@
+/*
+ * MQ-coder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MQC_H
+#define AVCODEC_MQC_H
+
+/**
+ * MQ-coder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "avcodec.h"
+
+#define MQC_CX_UNI 17
+#define MQC_CX_RL  18
+
+extern uint16_t  ff_mqc_qe[2*47];
+extern uint8_t ff_mqc_nlps[2*47];
+extern uint8_t ff_mqc_nmps[2*47];
+
+typedef struct {
+    uint8_t *bp, *bpstart;
+    unsigned int a;
+    unsigned int c;
+    unsigned int ct;
+    uint8_t cx_states[19];
+} MqcState;
+
+/* encoder */
+
+/** initialize the encoder */
+void ff_mqc_initenc(MqcState *mqc, uint8_t *bp);
+
+/** code bit d with context cx */
+void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d);
+
+/** number of encoded bytes */
+int ff_mqc_length(MqcState *mqc);
+
+/** flush the encoder [returns number of bytes encoded] */
+int ff_mqc_flush(MqcState *mqc);
+
+/* decoder */
+
+/** initialize the decoder */
+void ff_mqc_initdec(MqcState *mqc, uint8_t *bp);
+
+/** returns decoded bit with context cx */
+int ff_mqc_decode(MqcState *mqc, uint8_t *cxstate);
+
+/* common */
+
+/** initialize the contexts */
+void ff_mqc_init_contexts(MqcState *mqc);
+
+#endif /* AVCODEC_MQC_H */
diff --git a/libavcodec/mqcdec.c b/libavcodec/mqcdec.c
new file mode 100644
index 0000000000..56e22f88c7
--- /dev/null
+++ b/libavcodec/mqcdec.c
@@ -0,0 +1,93 @@
+/*
+ * MQ-coder decoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MQ-coder decoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "mqc.h"
+
+static void bytein(MqcState *mqc)
+{
+    if (*mqc->bp == 0xff){
+        if (*(mqc->bp+1) > 0x8f)
+            mqc->c++;
+        else{
+            mqc->bp++;
+            mqc->c += 2 + 0xfe00 - (*mqc->bp << 9);
+        }
+    } else{
+        mqc->bp++;
+        mqc->c += 1 + 0xff00 - (*mqc->bp << 8);
+    }
+}
+
+static int exchange(MqcState *mqc, uint8_t *cxstate, int lps)
+{
+    int d;
+    if ((mqc->a < ff_mqc_qe[*cxstate]) ^ (!lps)){
+        if (lps)
+            mqc->a = ff_mqc_qe[*cxstate];
+        d = *cxstate & 1;
+        *cxstate = ff_mqc_nmps[*cxstate];
+    } else{
+        if (lps)
+            mqc->a = ff_mqc_qe[*cxstate];
+        d = 1 - (*cxstate & 1);
+        *cxstate = ff_mqc_nlps[*cxstate];
+    }
+    // renormd:
+    do{
+        if (!(mqc->c & 0xff)){
+            mqc->c -= 0x100;
+            bytein(mqc);
+        }
+        mqc->a += mqc->a;
+        mqc->c += mqc->c;
+    } while (!(mqc->a & 0x8000));
+    return d;
+}
+
+void ff_mqc_initdec(MqcState *mqc, uint8_t *bp)
+{
+    ff_mqc_init_contexts(mqc);
+    mqc->bp = bp;
+    mqc->c = (*mqc->bp ^ 0xff) << 16;
+    bytein(mqc);
+    mqc->c = mqc->c << 7;
+    mqc->a = 0x8000;
+}
+
+int ff_mqc_decode(MqcState *mqc, uint8_t *cxstate)
+{
+    mqc->a -= ff_mqc_qe[*cxstate];
+    if ((mqc->c >> 16) < mqc->a){
+        if (mqc->a & 0x8000)
+            return *cxstate & 1;
+        else
+            return exchange(mqc, cxstate, 0);
+    } else {
+        mqc->c -= mqc->a << 16;
+        return exchange(mqc, cxstate, 1);
+    }
+}
diff --git a/libavcodec/mqcenc.c b/libavcodec/mqcenc.c
new file mode 100644
index 0000000000..97d352be44
--- /dev/null
+++ b/libavcodec/mqcenc.c
@@ -0,0 +1,119 @@
+/*
+ * MQ-coder encoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MQ-coder encoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "mqc.h"
+
+static void byteout(MqcState *mqc)
+{
+retry:
+    if (*mqc->bp == 0xff){
+        mqc->bp++;
+        *mqc->bp = mqc->c >> 20;
+        mqc->c &= 0xfffff;
+        mqc->ct = 7;
+    } else if ((mqc->c & 0x8000000)){
+        (*mqc->bp)++;
+        mqc->c &= 0x7ffffff;
+        goto retry;
+    } else{
+        mqc->bp++;
+        *mqc->bp = mqc->c >> 19;
+        mqc->c &= 0x7ffff;
+        mqc->ct = 8;
+    }
+}
+
+static void renorme(MqcState *mqc)
+{
+    do{
+        mqc->a += mqc->a;
+        mqc->c += mqc->c;
+        if (!--mqc->ct)
+            byteout(mqc);
+    } while (!(mqc->a & 0x8000));
+}
+
+static void setbits(MqcState *mqc)
+{
+    int tmp = mqc->c + mqc->a;
+    mqc->c |= 0xffff;
+    if (mqc->c >= tmp)
+        mqc->c -= 0x8000;
+}
+
+void ff_mqc_initenc(MqcState *mqc, uint8_t *bp)
+{
+    ff_mqc_init_contexts(mqc);
+    mqc->a = 0x8000;
+    mqc->c = 0;
+    mqc->bp = bp-1;
+    mqc->bpstart = bp;
+    mqc->ct = 12 + (*mqc->bp == 0xff);
+}
+
+void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d)
+{
+    int qe;
+
+    qe = ff_mqc_qe[*cxstate];
+    mqc->a -= qe;
+    if ((*cxstate & 1) == d){
+        if (!(mqc->a & 0x8000)){
+            if (mqc->a < qe)
+                mqc->a = qe;
+            else
+                mqc->c += qe;
+            *cxstate = ff_mqc_nmps[*cxstate];
+            renorme(mqc);
+        } else
+            mqc->c += qe;
+    } else{
+        if (mqc->a < qe)
+            mqc->c += qe;
+        else
+            mqc->a = qe;
+        *cxstate = ff_mqc_nlps[*cxstate];
+        renorme(mqc);
+    }
+}
+
+int ff_mqc_length(MqcState *mqc)
+{
+    return mqc->bp - mqc->bpstart;
+}
+
+int ff_mqc_flush(MqcState *mqc)
+{
+    setbits(mqc);
+    mqc->c = mqc->c << mqc->ct;
+    byteout(mqc);
+    mqc->c = mqc->c << mqc->ct;
+    byteout(mqc);
+    if (*mqc->bp != 0xff)
+        mqc->bp++;
+    return mqc->bp - mqc->bpstart;
+}
diff --git a/libavformat/img2.c b/libavformat/img2.c
index 58b0780a3a..e0d9293aea 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -77,6 +77,7 @@ static const IdStrMap img_tags[] = {
     { CODEC_ID_SUNRAST   , "sunras"},
     { CODEC_ID_JPEG2000  , "j2k"},
     { CODEC_ID_JPEG2000  , "jp2"},
+    { CODEC_ID_JPEG2000  , "jpc"},
     { CODEC_ID_DPX       , "dpx"},
     { CODEC_ID_PICTOR    , "pic"},
     { CODEC_ID_NONE      , NULL}
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 833a6bf9e8..4b3ab60bb7 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -250,6 +250,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { CODEC_ID_ZMBV,         MKTAG('Z', 'M', 'B', 'V') },
     { CODEC_ID_KMVC,         MKTAG('K', 'M', 'V', 'C') },
     { CODEC_ID_CAVS,         MKTAG('C', 'A', 'V', 'S') },
+    { CODEC_ID_JPEG2000,     MKTAG('m', 'j', 'p', '2') },
     { CODEC_ID_JPEG2000,     MKTAG('M', 'J', '2', 'C') },
     { CODEC_ID_VMNC,         MKTAG('V', 'M', 'n', 'c') },
     { CODEC_ID_TARGA,        MKTAG('t', 'g', 'a', ' ') },

From cbe60f34515dcfcbd08c00685d5810857ba9513e Mon Sep 17 00:00:00 2001
From: Rukhsana Ruby <rukhsana.afroz@gmail.com>
Date: Mon, 23 May 2011 23:55:28 +0200
Subject: [PATCH 219/830] j2k: Add void as the parameter of function
 ff_j2k_init_tier1_luts

---
 libavcodec/j2k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/j2k.c b/libavcodec/j2k.c
index 3cf87bc9fa..a58167b1b8 100644
--- a/libavcodec/j2k.c
+++ b/libavcodec/j2k.c
@@ -166,7 +166,7 @@ static int getsgnctxno(int flag, uint8_t *xorbit)
     return ctxlbltab[hcontrib][vcontrib];
 }
 
-void ff_j2k_init_tier1_luts()
+void ff_j2k_init_tier1_luts(void)
 {
     int i, j;
     for (i = 0; i < 256; i++)

From 88e9397ef1e8c854e00b329cc866f88d046893d9 Mon Sep 17 00:00:00 2001
From: Rukhsana Ruby <rukhsana.afroz@gmail.com>
Date: Mon, 23 May 2011 23:56:20 +0200
Subject: [PATCH 220/830] j2k[c/h] j2kdec.c: Implement 2 code block styles

---
 libavcodec/j2k.c    | 16 +++++++++-------
 libavcodec/j2k.h    |  2 +-
 libavcodec/j2kdec.c | 30 +++++++++++++++++++++++-------
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/libavcodec/j2k.c b/libavcodec/j2k.c
index a58167b1b8..33a7e3100b 100644
--- a/libavcodec/j2k.c
+++ b/libavcodec/j2k.c
@@ -105,18 +105,20 @@ static void tag_tree_zero(J2kTgtNode *t, int w, int h)
 
 uint8_t ff_j2k_nbctxno_lut[256][4];
 
-static int getnbctxno(int flag, int bandno)
+static int getnbctxno(int flag, int bandno, int vert_causal_ctx_csty_symbol)
 {
     int h, v, d;
 
     h = ((flag & J2K_T1_SIG_E) ? 1:0)+
         ((flag & J2K_T1_SIG_W) ? 1:0);
-    v = ((flag & J2K_T1_SIG_N) ? 1:0)+
-        ((flag & J2K_T1_SIG_S) ? 1:0);
+    v = ((flag & J2K_T1_SIG_N) ? 1:0);
+    if (!vert_causal_ctx_csty_symbol)
+         v = v + ((flag & J2K_T1_SIG_S) ? 1:0);
     d = ((flag & J2K_T1_SIG_NE) ? 1:0)+
-        ((flag & J2K_T1_SIG_NW) ? 1:0)+
-        ((flag & J2K_T1_SIG_SE) ? 1:0)+
-        ((flag & J2K_T1_SIG_SW) ? 1:0);
+        ((flag & J2K_T1_SIG_NW) ? 1:0);
+    if (!vert_causal_ctx_csty_symbol)
+        d = d + ((flag & J2K_T1_SIG_SE) ? 1:0)+
+                ((flag & J2K_T1_SIG_SW) ? 1:0);
     if (bandno < 3){
             if (bandno == 1)
                 FFSWAP(int, h, v);
@@ -171,7 +173,7 @@ void ff_j2k_init_tier1_luts(void)
     int i, j;
     for (i = 0; i < 256; i++)
         for (j = 0; j < 4; j++)
-            ff_j2k_nbctxno_lut[i][j] = getnbctxno(i, j);
+            ff_j2k_nbctxno_lut[i][j] = getnbctxno(i, j, 0);
     for (i = 0; i < 16; i++)
         for (j = 0; j < 16; j++)
             ff_j2k_sgnctxno_lut[i][j] = getsgnctxno(i + (j << 8), &ff_j2k_xorbit_lut[i][j]);
diff --git a/libavcodec/j2k.h b/libavcodec/j2k.h
index 3a41b5381c..85d5cd079c 100644
--- a/libavcodec/j2k.h
+++ b/libavcodec/j2k.h
@@ -208,7 +208,7 @@ void ff_j2k_set_significant(J2kT1Context *t1, int x, int y, int negative);
 
 extern uint8_t ff_j2k_nbctxno_lut[256][4];
 
-static inline int ff_j2k_getnbctxno(int flag, int bandno)
+static inline int ff_j2k_getnbctxno(int flag, int bandno, int vert_causal_ctx_csty_symbol)
 {
     return ff_j2k_nbctxno_lut[flag&255][bandno];
 }
diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c
index 112c0ffd3e..f2ece8159d 100644
--- a/libavcodec/j2kdec.c
+++ b/libavcodec/j2kdec.c
@@ -592,7 +592,8 @@ static int decode_packets(J2kDecoderContext *s, J2kTile *tile)
 }
 
 /* TIER-1 routines */
-static void decode_sigpass(J2kT1Context *t1, int width, int height, int bpno, int bandno)
+static void decode_sigpass(J2kT1Context *t1, int width, int height, int bpno, int bandno, int bpass_csty_symbol,
+                           int vert_causal_ctx_csty_symbol)
 {
     int mask = 3 << (bpno - 1), y0, x, y;
 
@@ -601,10 +602,15 @@ static void decode_sigpass(J2kT1Context *t1, int width, int height, int bpno, in
             for (y = y0; y < height && y < y0+4; y++){
                 if ((t1->flags[y+1][x+1] & J2K_T1_SIG_NB)
                 && !(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){
-                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno))){
+                    int vert_causal_ctx_csty_loc_symbol = vert_causal_ctx_csty_symbol && (x == 3 && y == 3);
+                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno,
+                                      vert_causal_ctx_csty_loc_symbol))){
                         int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
-
-                        t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? -mask : mask;
+                        if (bpass_csty_symbol)
+                             t1->data[y][x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask;
+                        else
+                             t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ?
+                                               -mask : mask;
 
                         ff_j2k_set_significant(t1, x, y, t1->data[y][x] < 0);
                     }
@@ -658,7 +664,8 @@ static void decode_clnpass(J2kDecoderContext *s, J2kT1Context *t1, int width, in
             for (y = y0 + runlen; y < y0 + 4 && y < height; y++){
                 if (!dec){
                     if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)))
-                        dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno));
+                        dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1],
+                                                                                             bandno, 0));
                 }
                 if (dec){
                     int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
@@ -685,7 +692,7 @@ static void decode_clnpass(J2kDecoderContext *s, J2kT1Context *t1, int width, in
 static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Context *t1, J2kCblk *cblk,
                        int width, int height, int bandpos)
 {
-    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y;
+    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y, clnpass_cnt = 0;
 
     for (y = 0; y < height+2; y++)
         memset(t1->flags[y], 0, (width+2)*sizeof(int));
@@ -697,14 +704,23 @@ static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Contex
     cblk->data[cblk->length] = 0xff;
     cblk->data[cblk->length+1] = 0xff;
 
+    int bpass_csty_symbol = J2K_CBLK_BYPASS & codsty->cblk_style;
+    int vert_causal_ctx_csty_symbol = J2K_CBLK_VSC & codsty->cblk_style;
+
     while(passno--){
         switch(pass_t){
-            case 0: decode_sigpass(t1, width, height, bpno+1, bandpos);
+            case 0: decode_sigpass(t1, width, height, bpno+1, bandpos,
+                                  bpass_csty_symbol && (clnpass_cnt >= 4), vert_causal_ctx_csty_symbol);
                     break;
             case 1: decode_refpass(t1, width, height, bpno+1);
+                    if (bpass_csty_symbol && clnpass_cnt >= 4)
+                        ff_mqc_initdec(&t1->mqc, cblk->data);
                     break;
             case 2: decode_clnpass(s, t1, width, height, bpno+1, bandpos,
                                    codsty->cblk_style & J2K_CBLK_SEGSYM);
+                    clnpass_cnt = clnpass_cnt + 1;
+                    if (bpass_csty_symbol && clnpass_cnt >= 4)
+                       ff_mqc_initdec(&t1->mqc, cblk->data);
                     break;
         }
 

From df0adc806b77a6e0cb9293a5002ed00788436f68 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 00:01:29 +0200
Subject: [PATCH 221/830] j2kdec: mark as CODEC_CAP_EXPERIMENTAL

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/j2kdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c
index f2ece8159d..3156e90198 100644
--- a/libavcodec/j2kdec.c
+++ b/libavcodec/j2kdec.c
@@ -1063,7 +1063,7 @@ AVCodec ff_jpeg2000_decoder = {
     NULL,
     decode_end,
     decode_frame,
-    0,
+    .capabilities = CODEC_CAP_EXPERIMENTAL,
     .pix_fmts =
         (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24, -1}
 };

From 1bc81bf981552ec542e14af194cd05d7c74a83e1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 01:31:15 +0200
Subject: [PATCH 222/830] avcodec_find_decoder: prefer non experimental
 decoders.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/utils.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index aecfb8af33..9dcc4a8105 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -903,14 +903,18 @@ AVCodec *avcodec_find_encoder_by_name(const char *name)
 
 AVCodec *avcodec_find_decoder(enum CodecID id)
 {
-    AVCodec *p;
+    AVCodec *p, *experimental=NULL;
     p = first_avcodec;
     while (p) {
-        if (p->decode != NULL && p->id == id)
-            return p;
+        if (p->decode != NULL && p->id == id) {
+            if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) {
+                experimental = p;
+            } else
+                return p;
+        }
         p = p->next;
     }
-    return NULL;
+    return experimental;
 }
 
 AVCodec *avcodec_find_decoder_by_name(const char *name)

From 0781e14ec440463529c5cae25f7335a7224d7f26 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 04:57:51 +0200
Subject: [PATCH 223/830] fate: reenable frext-pph10i4_panasonic_a after the
 bitstream has been fixed

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 tests/fate/h264.mak | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index 0f18be4888..5e6ff50e84 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak
@@ -130,6 +130,7 @@ FATE_H264 = aud_mw_e                                                    \
             frext-pph10i1_panasonic_a                                   \
             frext-pph10i2_panasonic_a                                   \
             frext-pph10i3_panasonic_a                                   \
+            frext-pph10i4_panasonic_a                                   \
             frext-pph10i5_panasonic_a                                   \
             frext-pph10i6_panasonic_a                                   \
             frext-pph10i7_panasonic_a                                   \
@@ -310,6 +311,7 @@ fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc  -i $(SAMPLES)/h264-co
 fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le

From af2f79709947e5135e2923138a15ba83daeb75c5 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 05:19:56 +0200
Subject: [PATCH 224/830] partial revert of
 01d3ebaf219d83c0a70cdf9696ecb6b868e8a165

Fixes ffplay

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/utils.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 9dcc4a8105..5354459866 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -746,6 +746,8 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
 
             if(!avctx->has_b_frames){
             picture->pkt_pos= avpkt->pos;
+            }
+            //FIXME these should be under if(!avctx->has_b_frames)
             if (!picture->sample_aspect_ratio.num)
                 picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
             if (!picture->width)
@@ -754,7 +756,6 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
                 picture->height = avctx->height;
             if (picture->format == PIX_FMT_NONE)
                 picture->format = avctx->pix_fmt;
-            }
         }
 
         emms_c(); //needed to avoid an emms_c() call before every return;

From 7f3a7b5c40b7fda09cbba4bb53e1ced133970930 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Mon, 23 May 2011 17:45:51 +0200
Subject: [PATCH 225/830] ac3enc: add channel coupling support

Channel coupling is an optional AC-3 feature that increases quality by
combining high frequency information from multiple channels into a
single channel. The per-channel high frequency information is sent with
less accuracy in both the frequency and time domains. This allows more
bits to be used for lower frequencies while preserving enough
information to reconstruct the high frequencies.
---
 doc/encoders.texi         |  43 ++
 libavcodec/ac3.h          |   3 +-
 libavcodec/ac3dec.h       |   5 -
 libavcodec/ac3dec_data.c  |   6 -
 libavcodec/ac3dec_data.h  |   1 -
 libavcodec/ac3enc.c       | 894 +++++++++++++++++++++++++++++++-------
 libavcodec/ac3enc_fixed.c |   2 +-
 libavcodec/ac3enc_float.c |   6 +-
 libavcodec/ac3tab.c       |   7 +
 libavcodec/ac3tab.h       |   1 +
 10 files changed, 788 insertions(+), 180 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 2f3cecde66..760ec4bad9 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -365,4 +365,47 @@ is highly recommended that it be left as enabled except for testing purposes.
 
 @end table
 
+@subheading Floating-Point-Only AC-3 Encoding Options
+
+These options are only valid for the floating-point encoder and do not exist
+for the fixed-point encoder due to the corresponding features not being
+implemented in fixed-point.
+
+@table @option
+
+@item -channel_coupling @var{boolean}
+Enables/Disables use of channel coupling, which is an optional AC-3 feature
+that increases quality by combining high frequency information from multiple
+channels into a single channel. The per-channel high frequency information is
+sent with less accuracy in both the frequency and time domains. This allows
+more bits to be used for lower frequencies while preserving enough information
+to reconstruct the high frequencies. This option is enabled by default for the
+floating-point encoder and should generally be left as enabled except for
+testing purposes or to increase encoding speed.
+@table @option
+@item -1
+@itemx auto
+Selected by Encoder (default)
+@item 0
+@itemx off
+Disable Channel Coupling
+@item 1
+@itemx on
+Enable Channel Coupling
+@end table
+
+@item -cpl_start_band @var{number}
+Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a
+value higher than the bandwidth is used, it will be reduced to 1 less than the
+coupling end band. If @var{auto} is used, the start band will be determined by
+the encoder based on the bit rate, sample rate, and channel layout. This option
+has no effect if channel coupling is disabled.
+@table @option
+@item -1
+@itemx auto
+Selected by Encoder (default)
+@end table
+
+@end table
+
 @c man end ENCODERS
diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
index 4ed8c2523b..c06f3d542d 100644
--- a/libavcodec/ac3.h
+++ b/libavcodec/ac3.h
@@ -28,7 +28,8 @@
 #define AVCODEC_AC3_H
 
 #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
-#define AC3_MAX_CHANNELS 6 /* including LFE channel */
+#define AC3_MAX_CHANNELS 7            /**< maximum number of channels, including coupling channel */
+#define CPL_CH 0                      /**< coupling channel index */
 
 #define AC3_MAX_COEFS   256
 #define AC3_BLOCK_SIZE  256
diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index 6cba95b495..f0ab75ae98 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -58,11 +58,6 @@
 #include "fft.h"
 #include "fmtconvert.h"
 
-/* override ac3.h to include coupling channel */
-#undef AC3_MAX_CHANNELS
-#define AC3_MAX_CHANNELS 7
-#define CPL_CH 0
-
 #define AC3_OUTPUT_LFEON  8
 
 #define SPX_MAX_BANDS    17
diff --git a/libavcodec/ac3dec_data.c b/libavcodec/ac3dec_data.c
index ba3cbd30ef..272a963f08 100644
--- a/libavcodec/ac3dec_data.c
+++ b/libavcodec/ac3dec_data.c
@@ -53,12 +53,6 @@ const uint8_t ff_eac3_hebap_tab[64] = {
     19, 19, 19, 19,
 };
 
-/**
- * Table E2.16 Default Coupling Banding Structure
- */
-const uint8_t ff_eac3_default_cpl_band_struct[18] =
-{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 };
-
 /**
  * Table E2.15 Default Spectral Extension Banding Structure
  */
diff --git a/libavcodec/ac3dec_data.h b/libavcodec/ac3dec_data.h
index a758f8b3d3..c0a584e7b3 100644
--- a/libavcodec/ac3dec_data.h
+++ b/libavcodec/ac3dec_data.h
@@ -27,7 +27,6 @@
 extern const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3];
 
 extern const uint8_t ff_eac3_hebap_tab[64];
-extern const uint8_t ff_eac3_default_cpl_band_struct[18];
 extern const uint8_t ff_eac3_default_spx_band_struct[17];
 
 #endif /* AVCODEC_AC3DEC_DATA_H */
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 95bdc58f16..5014fdb753 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -99,6 +99,8 @@ typedef struct AC3EncOptions {
     /* other encoding options */
     int allow_per_frame_metadata;
     int stereo_rematrixing;
+    int channel_coupling;
+    int cpl_start;
 } AC3EncOptions;
 
 /**
@@ -114,10 +116,22 @@ typedef struct AC3Block {
     int16_t  **band_psd;                        ///< psd per critical band
     int16_t  **mask;                            ///< masking curve
     uint16_t **qmant;                           ///< quantized mantissas
+    uint8_t  **cpl_coord_exp;                   ///< coupling coord exponents           (cplcoexp)
+    uint8_t  **cpl_coord_mant;                  ///< coupling coord mantissas           (cplcomant)
     uint8_t  coeff_shift[AC3_MAX_CHANNELS];     ///< fixed-point coefficient shift values
     uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
+    int      num_rematrixing_bands;             ///< number of rematrixing bands
     uint8_t  rematrixing_flags[4];              ///< rematrixing flags
     struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE
+    int      new_cpl_strategy;                  ///< send new coupling strategy
+    int      cpl_in_use;                        ///< coupling in use for this block     (cplinu)
+    uint8_t  channel_in_cpl[AC3_MAX_CHANNELS];  ///< channel in coupling                (chincpl)
+    int      num_cpl_channels;                  ///< number of channels in coupling
+    uint8_t  new_cpl_coords;                    ///< send new coupling coordinates      (cplcoe)
+    uint8_t  cpl_master_exp[AC3_MAX_CHANNELS];  ///< coupling coord master exponents    (mstrcplco)
+    int      new_snr_offsets;                   ///< send new SNR offsets
+    int      new_cpl_leak;                      ///< send new coupling leak info
+    int      end_freq[AC3_MAX_CHANNELS];        ///< end frequency bin                  (endmant)
 } AC3Block;
 
 /**
@@ -164,10 +178,16 @@ typedef struct AC3EncodeContext {
 
     int cutoff;                             ///< user-specified cutoff frequency, in Hz
     int bandwidth_code;                     ///< bandwidth code (0 to 60)               (chbwcod)
-    int nb_coefs[AC3_MAX_CHANNELS];
+    int start_freq[AC3_MAX_CHANNELS];       ///< start frequency bin                    (strtmant)
+    int cpl_end_freq;                       ///< coupling channel end frequency bin
+
+    int cpl_on;                             ///< coupling turned on for this frame
+    int cpl_enabled;                        ///< coupling enabled for all frames
+    int num_cpl_subbands;                   ///< number of coupling subbands            (ncplsubnd)
+    int num_cpl_bands;                      ///< number of coupling bands               (ncplbnd)
+    uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS];  ///< number of coeffs in each coupling band
 
     int rematrixing_enabled;                ///< stereo rematrixing enabled
-    int num_rematrixing_bands;              ///< number of rematrixing bands
 
     /* bitrate allocation control */
     int slow_gain_code;                     ///< slow gain code                         (sgaincod)
@@ -194,6 +214,8 @@ typedef struct AC3EncodeContext {
     int16_t *band_psd_buffer;
     int16_t *mask_buffer;
     uint16_t *qmant_buffer;
+    uint8_t *cpl_coord_exp_buffer;
+    uint8_t *cpl_coord_mant_buffer;
 
     uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
 
@@ -267,6 +289,12 @@ static const AVOption options[] = {
     {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
 /* Other Encoding Options */
 {"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+#if CONFIG_AC3ENC_FLOAT
+{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
+{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
+#endif
 {NULL}
 };
 
@@ -296,9 +324,9 @@ static void scale_coefficients(AC3EncodeContext *s);
 
 /**
  * LUT for number of exponent groups.
- * exponent_group_tab[exponent strategy-1][number of coefficients]
+ * exponent_group_tab[coupling][exponent strategy-1][number of coefficients]
  */
-static uint8_t exponent_group_tab[3][256];
+static uint8_t exponent_group_tab[2][3][256];
 
 
 /**
@@ -357,6 +385,49 @@ static const uint8_t ac3_bandwidth_tab[5][3][19] = {
 };
 
 
+/**
+ * LUT to select the coupling start band based on the bit rate, sample rate, and
+ * number of full-bandwidth channels. -1 = coupling off
+ * ac3_coupling_start_tab[channel_mode-2][sample rate code][bit rate code]
+ *
+ * TODO: more testing for optimal parameters.
+ *       multi-channel tests at 44.1kHz and 32kHz.
+ */
+static const int8_t ac3_coupling_start_tab[6][3][19] = {
+//      32  40  48  56  64  80  96 112 128 160 192 224 256 320 384 448 512 576 640
+
+    // 2/0
+    { {  0,  0,  0,  0,  0,  0,  0,  1,  1,  7,  8, 11, 12, -1, -1, -1, -1, -1, -1 },
+      {  0,  0,  0,  0,  0,  0,  1,  3,  5,  7, 10, 12, 13, -1, -1, -1, -1, -1, -1 },
+      {  0,  0,  0,  0,  1,  2,  2,  9, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+
+    // 3/0
+    { {  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  6,  9, 11, 12, 13, -1, -1, -1, -1 },
+      {  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  6,  9, 11, 12, 13, -1, -1, -1, -1 },
+      { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+
+    // 2/1 - untested
+    { {  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  6,  9, 11, 12, 13, -1, -1, -1, -1 },
+      {  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  6,  9, 11, 12, 13, -1, -1, -1, -1 },
+      { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+
+    // 3/1
+    { {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  2, 10, 11, 11, 12, 12, 14, -1 },
+      {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  2, 10, 11, 11, 12, 12, 14, -1 },
+      { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+
+    // 2/2 - untested
+    { {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  2, 10, 11, 11, 12, 12, 14, -1 },
+      {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  2, 10, 11, 11, 12, 12, 14, -1 },
+      { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+
+    // 3/2
+    { {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  6,  8, 11, 12, 12, -1, -1 },
+      {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  6,  8, 11, 12, 12, -1, -1 },
+      { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
+};
+
+
 /**
  * Adjust the frame size to make the average bit rate match the target bit rate.
  * This is only needed for 11025, 22050, and 44100 sample rates.
@@ -419,15 +490,297 @@ static void apply_mdct(AC3EncodeContext *s)
 
             apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct.window, AC3_WINDOW_SIZE);
 
-            block->coeff_shift[ch] = normalize_samples(s);
+            block->coeff_shift[ch+1] = normalize_samples(s);
 
-            s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch],
+            s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch+1],
                                    s->windowed_samples);
         }
     }
 }
 
 
+static void compute_coupling_strategy(AC3EncodeContext *s)
+{
+    int blk, ch;
+    int got_cpl_snr;
+
+    /* set coupling use flags for each block/channel */
+    /* TODO: turn coupling on/off and adjust start band based on bit usage */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        for (ch = 1; ch <= s->fbw_channels; ch++)
+            block->channel_in_cpl[ch] = s->cpl_on;
+    }
+
+    /* enable coupling for each block if at least 2 channels have coupling
+       enabled for that block */
+    got_cpl_snr = 0;
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        block->num_cpl_channels = 0;
+        for (ch = 1; ch <= s->fbw_channels; ch++)
+            block->num_cpl_channels += block->channel_in_cpl[ch];
+        block->cpl_in_use = block->num_cpl_channels > 1;
+        if (!block->cpl_in_use) {
+            block->num_cpl_channels = 0;
+            for (ch = 1; ch <= s->fbw_channels; ch++)
+                block->channel_in_cpl[ch] = 0;
+        }
+
+        block->new_cpl_strategy = !blk;
+        if (blk) {
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (block->channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]) {
+                    block->new_cpl_strategy = 1;
+                    break;
+                }
+            }
+        }
+        block->new_cpl_leak = block->new_cpl_strategy;
+
+        if (!blk || (block->cpl_in_use && !got_cpl_snr)) {
+            block->new_snr_offsets = 1;
+            if (block->cpl_in_use)
+                got_cpl_snr = 1;
+        } else {
+            block->new_snr_offsets = 0;
+        }
+    }
+
+    /* set bandwidth for each channel */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (block->channel_in_cpl[ch])
+                block->end_freq[ch] = s->start_freq[CPL_CH];
+            else
+                block->end_freq[ch] = s->bandwidth_code * 3 + 73;
+        }
+    }
+}
+
+
+/**
+ * Calculate a single coupling coordinate.
+ */
+static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
+{
+    float coord = 0.125;
+    if (energy_cpl > 0)
+        coord *= sqrtf(energy_ch / energy_cpl);
+    return coord;
+}
+
+
+/**
+ * Calculate coupling channel and coupling coordinates.
+ * TODO: Currently this is only used for the floating-point encoder. I was
+ *       able to make it work for the fixed-point encoder, but quality was
+ *       generally lower in most cases than not using coupling. If a more
+ *       adaptive coupling strategy were to be implemented it might be useful
+ *       at that time to use coupling for the fixed-point encoder as well.
+ */
+static void apply_channel_coupling(AC3EncodeContext *s)
+{
+#if CONFIG_AC3ENC_FLOAT
+    DECLARE_ALIGNED(16, float,   cpl_coords)      [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
+    DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
+    int blk, ch, bnd, i, j;
+    CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+    int num_cpl_coefs = s->num_cpl_subbands * 12;
+
+    /* calculate coupling channel from fbw channels */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
+        if (!block->cpl_in_use)
+            continue;
+        memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
+            if (!block->channel_in_cpl[ch])
+                continue;
+            for (i = 0; i < num_cpl_coefs; i++)
+                cpl_coef[i] += ch_coef[i];
+        }
+        /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
+                 will always be a multiple of 12, so we need to subtract 1 from
+                 the start and add 4 to the length when using optimized
+                 functions which require 16-byte alignment. */
+
+        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
+        s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
+
+        /* scale coupling coefficients from float to 24-bit fixed-point */
+        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
+                                   cpl_coef-1, num_cpl_coefs+4);
+    }
+
+    /* calculate energy in each band in coupling channel and each fbw channel */
+    /* TODO: possibly use SIMD to speed up energy calculation */
+    bnd = 0;
+    i = s->start_freq[CPL_CH];
+    while (i < s->cpl_end_freq) {
+        int band_size = s->cpl_band_sizes[bnd];
+        for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
+            for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+                AC3Block *block = &s->blocks[blk];
+                if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
+                    continue;
+                for (j = 0; j < band_size; j++) {
+                    CoefType v = block->mdct_coef[ch][i+j];
+                    MAC_COEF(energy[blk][ch][bnd], v, v);
+                }
+            }
+        }
+        i += band_size;
+        bnd++;
+    }
+
+    /* determine which blocks to send new coupling coordinates for */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block  = &s->blocks[blk];
+        AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
+        int new_coords = 0;
+        CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,};
+
+        if (block->cpl_in_use) {
+            /* calculate coupling coordinates for all blocks and calculate the
+               average difference between coordinates in successive blocks */
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (!block->channel_in_cpl[ch])
+                    continue;
+
+                for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                    cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd],
+                                                              energy[blk][CPL_CH][bnd]);
+                    if (blk > 0 && block0->cpl_in_use &&
+                        block0->channel_in_cpl[ch]) {
+                        coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] -
+                                               cpl_coords[blk  ][ch][bnd]);
+                    }
+                }
+                coord_diff[ch] /= s->num_cpl_bands;
+            }
+
+            /* send new coordinates if this is the first block, if previous
+             * block did not use coupling but this block does, the channels
+             * using coupling has changed from the previous block, or the
+             * coordinate difference from the last block for any channel is
+             * greater than a threshold value. */
+            if (blk == 0) {
+                new_coords = 1;
+            } else if (!block0->cpl_in_use) {
+                new_coords = 1;
+            } else {
+                for (ch = 1; ch <= s->fbw_channels; ch++) {
+                    if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) {
+                        new_coords = 1;
+                        break;
+                    }
+                }
+                if (!new_coords) {
+                    for (ch = 1; ch <= s->fbw_channels; ch++) {
+                        if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) {
+                            new_coords = 1;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        block->new_cpl_coords = new_coords;
+    }
+
+    /* calculate final coupling coordinates, taking into account reusing of
+       coordinates in successive blocks */
+    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+        blk = 0;
+        while (blk < AC3_MAX_BLOCKS) {
+            int blk1;
+            CoefSumType energy_cpl;
+            AC3Block *block  = &s->blocks[blk];
+
+            if (!block->cpl_in_use) {
+                blk++;
+                continue;
+            }
+
+            energy_cpl = energy[blk][CPL_CH][bnd];
+            blk1 = blk+1;
+            while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+                if (s->blocks[blk1].cpl_in_use)
+                    energy_cpl += energy[blk1][CPL_CH][bnd];
+                blk1++;
+            }
+
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                CoefType energy_ch;
+                if (!block->channel_in_cpl[ch])
+                    continue;
+                energy_ch = energy[blk][ch][bnd];
+                blk1 = blk+1;
+                while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+                    if (s->blocks[blk1].cpl_in_use)
+                        energy_ch += energy[blk1][ch][bnd];
+                    blk1++;
+                }
+                cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl);
+            }
+            blk = blk1;
+        }
+    }
+
+    /* calculate exponents/mantissas for coupling coordinates */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        if (!block->cpl_in_use || !block->new_cpl_coords)
+            continue;
+
+        s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
+                                   cpl_coords[blk][1],
+                                   s->fbw_channels * 16);
+        s->ac3dsp.extract_exponents(block->cpl_coord_exp[1],
+                                    fixed_cpl_coords[blk][1],
+                                    s->fbw_channels * 16);
+
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            int bnd, min_exp, max_exp, master_exp;
+
+            /* determine master exponent */
+            min_exp = max_exp = block->cpl_coord_exp[ch][0];
+            for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
+                int exp = block->cpl_coord_exp[ch][bnd];
+                min_exp = FFMIN(exp, min_exp);
+                max_exp = FFMAX(exp, max_exp);
+            }
+            master_exp = ((max_exp - 15) + 2) / 3;
+            master_exp = FFMAX(master_exp, 0);
+            while (min_exp < master_exp * 3)
+                master_exp--;
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] -
+                                                        master_exp * 3, 0, 15);
+            }
+            block->cpl_master_exp[ch] = master_exp;
+
+            /* quantize mantissas */
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                int cpl_exp  = block->cpl_coord_exp[ch][bnd];
+                int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24;
+                if (cpl_exp == 15)
+                    cpl_mant >>= 1;
+                else
+                    cpl_mant -= 16;
+
+                block->cpl_coord_mant[ch][bnd] = cpl_mant;
+            }
+        }
+    }
+#endif /* CONFIG_AC3ENC_FLOAT */
+}
+
+
 /**
  * Determine rematrixing flags for each block and band.
  */
@@ -440,23 +793,32 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
     if (s->channel_mode != AC3_CHMODE_STEREO)
         return;
 
-    s->num_rematrixing_bands = 4;
-
-    nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]);
-
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         block = &s->blocks[blk];
         block->new_rematrixing_strategy = !blk;
-        if (!s->rematrixing_enabled)
+
+        if (!s->rematrixing_enabled) {
+            block0 = block;
             continue;
-        for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
+        }
+
+        block->num_rematrixing_bands = 4;
+        if (block->cpl_in_use) {
+            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61);
+            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37);
+            if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands)
+                block->new_rematrixing_strategy = 1;
+        }
+        nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
+
+        for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
             /* calculate calculate sum of squared coeffs for one band in one block */
             int start = ff_ac3_rematrix_band_tab[bnd];
             int end   = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
             CoefSumType sum[4] = {0,};
             for (i = start; i < end; i++) {
-                CoefType lt = block->mdct_coef[0][i];
-                CoefType rt = block->mdct_coef[1][i];
+                CoefType lt = block->mdct_coef[1][i];
+                CoefType rt = block->mdct_coef[2][i];
                 CoefType md = lt + rt;
                 CoefType sd = lt - rt;
                 MAC_COEF(sum[0], lt, lt);
@@ -495,21 +857,20 @@ static void apply_rematrixing(AC3EncodeContext *s)
     if (!s->rematrixing_enabled)
         return;
 
-    nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]);
-
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
         if (block->new_rematrixing_strategy)
             flags = block->rematrixing_flags;
-        for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
+        nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
+        for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
             if (flags[bnd]) {
                 start = ff_ac3_rematrix_band_tab[bnd];
                 end   = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
                 for (i = start; i < end; i++) {
-                    int32_t lt = block->fixed_coef[0][i];
-                    int32_t rt = block->fixed_coef[1][i];
-                    block->fixed_coef[0][i] = (lt + rt) >> 1;
-                    block->fixed_coef[1][i] = (lt - rt) >> 1;
+                    int32_t lt = block->fixed_coef[1][i];
+                    int32_t rt = block->fixed_coef[2][i];
+                    block->fixed_coef[1][i] = (lt + rt) >> 1;
+                    block->fixed_coef[2][i] = (lt - rt) >> 1;
                 }
             }
         }
@@ -526,12 +887,13 @@ static av_cold void exponent_init(AC3EncodeContext *s)
 
     for (expstr = EXP_D15-1; expstr <= EXP_D45-1; expstr++) {
         grpsize = 3 << expstr;
-        for (i = 73; i < 256; i++) {
-            exponent_group_tab[expstr][i] = (i + grpsize - 4) / grpsize;
+        for (i = 12; i < 256; i++) {
+            exponent_group_tab[0][expstr][i] = (i + grpsize - 4) / grpsize;
+            exponent_group_tab[1][expstr][i] = (i              ) / grpsize;
         }
     }
     /* LFE */
-    exponent_group_tab[0][7] = 2;
+    exponent_group_tab[0][0][7] = 2;
 }
 
 
@@ -544,7 +906,7 @@ static void extract_exponents(AC3EncodeContext *s)
 {
     int blk, ch;
 
-    for (ch = 0; ch < s->channels; ch++) {
+    for (ch = !s->cpl_on; ch <= s->channels; ch++) {
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
             AC3Block *block = &s->blocks[blk];
             s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch],
@@ -569,7 +931,7 @@ static void compute_exp_strategy(AC3EncodeContext *s)
 {
     int ch, blk, blk1;
 
-    for (ch = 0; ch < s->fbw_channels; ch++) {
+    for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
         uint8_t *exp_strategy = s->exp_strategy[ch];
         uint8_t *exp          = s->blocks[0].exp[ch];
         int exp_diff;
@@ -578,13 +940,18 @@ static void compute_exp_strategy(AC3EncodeContext *s)
            reused in the next frame */
         exp_strategy[0] = EXP_NEW;
         exp += AC3_MAX_COEFS;
-        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
-            exp_diff = s->dsp.sad[0](NULL, exp, exp - AC3_MAX_COEFS, 16, 16);
-            if (exp_diff > EXP_DIFF_THRESHOLD)
+        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
+            if ((ch == CPL_CH && (!s->blocks[blk].cpl_in_use || !s->blocks[blk-1].cpl_in_use)) ||
+                (ch  > CPL_CH && (s->blocks[blk].channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]))) {
+                exp_strategy[blk] = EXP_NEW;
+                continue;
+            }
+            exp_diff = s->dsp.sad[0](NULL, exp, exp - AC3_MAX_COEFS, 16, 16);
+            exp_strategy[blk] = EXP_REUSE;
+            if (ch == CPL_CH && exp_diff > (EXP_DIFF_THRESHOLD * (s->blocks[blk].end_freq[ch] - s->start_freq[ch]) / AC3_MAX_COEFS))
+                exp_strategy[blk] = EXP_NEW;
+            else if (ch > CPL_CH && exp_diff > EXP_DIFF_THRESHOLD)
                 exp_strategy[blk] = EXP_NEW;
-            else
-                exp_strategy[blk] = EXP_REUSE;
-            exp += AC3_MAX_COEFS;
         }
 
         /* now select the encoding strategy type : if exponents are often
@@ -615,25 +982,26 @@ static void compute_exp_strategy(AC3EncodeContext *s)
 /**
  * Update the exponents so that they are the ones the decoder will decode.
  */
-static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
+static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy,
+                                    int cpl)
 {
     int nb_groups, i, k;
 
-    nb_groups = exponent_group_tab[exp_strategy-1][nb_exps] * 3;
+    nb_groups = exponent_group_tab[cpl][exp_strategy-1][nb_exps] * 3;
 
     /* for each group, compute the minimum exponent */
     switch(exp_strategy) {
     case EXP_D25:
-        for (i = 1, k = 1; i <= nb_groups; i++) {
+        for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
             uint8_t exp_min = exp[k];
             if (exp[k+1] < exp_min)
                 exp_min = exp[k+1];
-            exp[i] = exp_min;
+            exp[i-cpl] = exp_min;
             k += 2;
         }
         break;
     case EXP_D45:
-        for (i = 1, k = 1; i <= nb_groups; i++) {
+        for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
             uint8_t exp_min = exp[k];
             if (exp[k+1] < exp_min)
                 exp_min = exp[k+1];
@@ -641,14 +1009,14 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
                 exp_min = exp[k+2];
             if (exp[k+3] < exp_min)
                 exp_min = exp[k+3];
-            exp[i] = exp_min;
+            exp[i-cpl] = exp_min;
             k += 4;
         }
         break;
     }
 
     /* constraint for DC exponent */
-    if (exp[0] > 15)
+    if (!cpl && exp[0] > 15)
         exp[0] = 15;
 
     /* decrease the delta between each groups to within 2 so that they can be
@@ -659,18 +1027,21 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
     while (--i >= 0)
         exp[i] = FFMIN(exp[i], exp[i+1] + 2);
 
+    if (cpl)
+        exp[-1] = exp[0] & ~1;
+
     /* now we have the exponent values the decoder will see */
     switch (exp_strategy) {
     case EXP_D25:
-        for (i = nb_groups, k = nb_groups * 2; i > 0; i--) {
-            uint8_t exp1 = exp[i];
+        for (i = nb_groups, k = (nb_groups * 2)-cpl; i > 0; i--) {
+            uint8_t exp1 = exp[i-cpl];
             exp[k--] = exp1;
             exp[k--] = exp1;
         }
         break;
     case EXP_D45:
-        for (i = nb_groups, k = nb_groups * 4; i > 0; i--) {
-            exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i];
+        for (i = nb_groups, k = (nb_groups * 4)-cpl; i > 0; i--) {
+            exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i-cpl];
             k -= 4;
         }
         break;
@@ -686,32 +1057,40 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
  */
 static void encode_exponents(AC3EncodeContext *s)
 {
-    int blk, blk1, ch;
+    int blk, blk1, ch, cpl;
     uint8_t *exp, *exp_strategy;
     int nb_coefs, num_reuse_blocks;
 
-    for (ch = 0; ch < s->channels; ch++) {
-        exp          = s->blocks[0].exp[ch];
+    for (ch = !s->cpl_on; ch <= s->channels; ch++) {
+        exp          = s->blocks[0].exp[ch] + s->start_freq[ch];
         exp_strategy = s->exp_strategy[ch];
-        nb_coefs     = s->nb_coefs[ch];
 
+        cpl = (ch == CPL_CH);
         blk = 0;
         while (blk < AC3_MAX_BLOCKS) {
+            AC3Block *block = &s->blocks[blk];
+            if (cpl && !block->cpl_in_use) {
+                exp += AC3_MAX_COEFS;
+                blk++;
+                continue;
+            }
+            nb_coefs = block->end_freq[ch] - s->start_freq[ch];
             blk1 = blk + 1;
 
             /* count the number of EXP_REUSE blocks after the current block
                and set exponent reference block pointers */
-            s->blocks[blk].exp_ref_block[ch] = &s->blocks[blk];
+            block->exp_ref_block[ch] = block;
             while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
-                s->blocks[blk1].exp_ref_block[ch] = &s->blocks[blk];
+                s->blocks[blk1].exp_ref_block[ch] = block;
                 blk1++;
             }
             num_reuse_blocks = blk1 - blk - 1;
 
             /* for the EXP_REUSE case we select the min of the exponents */
-            s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs);
+            s->ac3dsp.ac3_exponent_min(exp-s->start_freq[ch], num_reuse_blocks,
+                                       AC3_MAX_COEFS);
 
-            encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]);
+            encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk], cpl);
 
             exp += AC3_MAX_COEFS * (num_reuse_blocks + 1);
             blk = blk1;
@@ -727,7 +1106,7 @@ static void encode_exponents(AC3EncodeContext *s)
  */
 static void group_exponents(AC3EncodeContext *s)
 {
-    int blk, ch, i;
+    int blk, ch, i, cpl;
     int group_size, nb_groups, bit_count;
     uint8_t *p;
     int delta0, delta1, delta2;
@@ -736,14 +1115,15 @@ static void group_exponents(AC3EncodeContext *s)
     bit_count = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             int exp_strategy = s->exp_strategy[ch][blk];
             if (exp_strategy == EXP_REUSE)
                 continue;
+            cpl = (ch == CPL_CH);
             group_size = exp_strategy + (exp_strategy == EXP_D45);
-            nb_groups = exponent_group_tab[exp_strategy-1][s->nb_coefs[ch]];
+            nb_groups = exponent_group_tab[cpl][exp_strategy-1][block->end_freq[ch]-s->start_freq[ch]];
             bit_count += 4 + (nb_groups * 7);
-            p = block->exp[ch];
+            p = block->exp[ch] + s->start_freq[ch] - cpl;
 
             /* DC exponent */
             exp1 = *p++;
@@ -810,9 +1190,7 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
 
     /* assumptions:
      *   no dynamic range codes
-     *   no channel coupling
      *   bit allocation parameters do not change between blocks
-     *   SNR offsets do not change between blocks
      *   no delta bit allocation
      *   no skipped data
      *   no auxilliary data
@@ -833,11 +1211,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
         /* dynamic range */
         frame_bits++;
 
-        /* coupling strategy */
-        frame_bits++;
-        if (!blk)
-            frame_bits++;
-
         /* exponent strategy */
         frame_bits += 2 * s->fbw_channels;
         if (s->lfe_on)
@@ -848,11 +1221,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
         if (!blk)
             frame_bits += 2 + 2 + 2 + 2 + 3;
 
-        /* snr offsets and fast gain codes */
-        frame_bits++;
-        if (!blk)
-            frame_bits += 6 + s->channels * (4 + 3);
-
         /* delta bit allocation */
         frame_bits++;
 
@@ -884,7 +1252,7 @@ static void bit_alloc_init(AC3EncodeContext *s)
     s->slow_gain_code  = 1;
     s->db_per_bit_code = 3;
     s->floor_code      = 7;
-    for (ch = 0; ch < s->channels; ch++)
+    for (ch = 0; ch <= s->channels; ch++)
         s->fast_gain_code[ch] = 4;
 
     /* initial snr offset */
@@ -898,6 +1266,8 @@ static void bit_alloc_init(AC3EncodeContext *s)
     s->bit_alloc.slow_gain  = ff_ac3_slow_gain_tab[s->slow_gain_code];
     s->bit_alloc.db_per_bit = ff_ac3_db_per_bit_tab[s->db_per_bit_code];
     s->bit_alloc.floor      = ff_ac3_floor_tab[s->floor_code];
+    s->bit_alloc.cpl_fast_leak = 0;
+    s->bit_alloc.cpl_slow_leak = 0;
 
     count_frame_bits_fixed(s);
 }
@@ -926,17 +1296,64 @@ static void count_frame_bits(AC3EncodeContext *s)
 
     /* audio blocks */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+
+        /* coupling strategy */
+        frame_bits++;
+        if (block->new_cpl_strategy) {
+            frame_bits++;
+            if (block->cpl_in_use) {
+                frame_bits += s->fbw_channels;
+                if (s->channel_mode == AC3_CHMODE_STEREO)
+                    frame_bits++;
+                frame_bits += 4 + 4;
+                frame_bits += s->num_cpl_subbands - 1;
+            }
+        }
+
+        /* coupling coordinates */
+        if (block->cpl_in_use) {
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (block->channel_in_cpl[ch]) {
+                    frame_bits++;
+                    if (block->new_cpl_coords) {
+                        frame_bits += 2;
+                        frame_bits += (4 + 4) * s->num_cpl_bands;
+                    }
+                }
+            }
+        }
+
         /* stereo rematrixing */
         if (s->channel_mode == AC3_CHMODE_STEREO) {
             frame_bits++;
             if (s->blocks[blk].new_rematrixing_strategy)
-                frame_bits += s->num_rematrixing_bands;
+                frame_bits += block->num_rematrixing_bands;
         }
 
         /* bandwidth codes & gain range */
-        for (ch = 0; ch < s->fbw_channels; ch++) {
-            if (s->exp_strategy[ch][blk] != EXP_REUSE)
-                frame_bits += 6 + 2;
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (s->exp_strategy[ch][blk] != EXP_REUSE) {
+                if (!block->channel_in_cpl[ch])
+                    frame_bits += 6;
+                frame_bits += 2;
+            }
+        }
+
+        /* coupling exponent strategy */
+        if (block->cpl_in_use)
+            frame_bits += 2;
+
+        /* snr offsets and fast gain codes */
+        frame_bits++;
+        if (block->new_snr_offsets)
+            frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
+
+        /* coupling leak info */
+        if (block->cpl_in_use) {
+            frame_bits++;
+            if (block->new_cpl_leak)
+                frame_bits += 3 + 3;
         }
     }
 
@@ -970,16 +1387,16 @@ static void bit_alloc_masking(AC3EncodeContext *s)
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             /* We only need psd and mask for calculating bap.
                Since we currently do not calculate bap when exponent
                strategy is EXP_REUSE we do not need to calculate psd or mask. */
             if (s->exp_strategy[ch][blk] != EXP_REUSE) {
-                ff_ac3_bit_alloc_calc_psd(block->exp[ch], 0,
-                                          s->nb_coefs[ch],
-                                          block->psd[ch], block->band_psd[ch]);
+                ff_ac3_bit_alloc_calc_psd(block->exp[ch], s->start_freq[ch],
+                                          block->end_freq[ch], block->psd[ch],
+                                          block->band_psd[ch]);
                 ff_ac3_bit_alloc_calc_mask(&s->bit_alloc, block->band_psd[ch],
-                                           0, s->nb_coefs[ch],
+                                           s->start_freq[ch], block->end_freq[ch],
                                            ff_ac3_fast_gain_tab[s->fast_gain_code[ch]],
                                            ch == s->lfe_channel,
                                            DBA_NONE, 0, NULL, NULL, NULL,
@@ -997,11 +1414,12 @@ static void bit_alloc_masking(AC3EncodeContext *s)
 static void reset_block_bap(AC3EncodeContext *s)
 {
     int blk, ch;
+    int channels = s->channels + 1;
     if (s->blocks[0].bap[0] == s->bap_buffer)
         return;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        for (ch = 0; ch < s->channels; ch++) {
-            s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
+        for (ch = 0; ch < channels; ch++) {
+            s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * channels + ch)];
         }
     }
 }
@@ -1027,28 +1445,37 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
         AC3Block *ref_block;
+        int av_uninit(ch0);
+        int got_cpl = !block->cpl_in_use;
         // initialize grouped mantissa counts. these are set so that they are
         // padded to the next whole group size when bits are counted in
         // compute_mantissa_size_final
         mant_cnt[0] = mant_cnt[3] = 0;
         mant_cnt[1] = mant_cnt[2] = 2;
         mant_cnt[4] = 1;
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = 1; ch <= s->channels; ch++) {
+            if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
+                ch0     = ch - 1;
+                ch      = CPL_CH;
+                got_cpl = 1;
+            }
+
             /* Currently the only bit allocation parameters which vary across
                blocks within a frame are the exponent values.  We can take
                advantage of that by reusing the bit allocation pointers
                whenever we reuse exponents. */
             ref_block = block->exp_ref_block[ch];
             if (s->exp_strategy[ch][blk] != EXP_REUSE) {
-                s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch],
-                                             ref_block->psd[ch], 0,
-                                             s->nb_coefs[ch], snr_offset,
-                                             s->bit_alloc.floor, ff_ac3_bap_tab,
-                                             ref_block->bap[ch]);
+                s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], ref_block->psd[ch],
+                                             s->start_freq[ch], block->end_freq[ch],
+                                             snr_offset, s->bit_alloc.floor,
+                                             ff_ac3_bap_tab, ref_block->bap[ch]);
             }
             mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             ref_block->bap[ch],
-                                                             s->nb_coefs[ch]);
+                                                             ref_block->bap[ch]+s->start_freq[ch],
+                                                             block->end_freq[ch]-s->start_freq[ch]);
+            if (ch == CPL_CH)
+                ch = ch0;
         }
         mantissa_bits += compute_mantissa_size_final(mant_cnt);
     }
@@ -1074,7 +1501,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s)
 
     /* if previous frame SNR offset was 1023, check if current frame can also
        use SNR offset of 1023. if so, skip the search. */
-    if ((snr_offset | s->fine_snr_offset[0]) == 1023) {
+    if ((snr_offset | s->fine_snr_offset[1]) == 1023) {
         if (bit_alloc(s, 1023) <= bits_left)
             return 0;
     }
@@ -1098,7 +1525,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s)
     reset_block_bap(s);
 
     s->coarse_snr_offset = snr_offset >> 4;
-    for (ch = 0; ch < s->channels; ch++)
+    for (ch = !s->cpl_on; ch <= s->channels; ch++)
         s->fine_snr_offset[ch] = snr_offset & 0xF;
 
     return 0;
@@ -1116,26 +1543,26 @@ static int downgrade_exponents(AC3EncodeContext *s)
 {
     int ch, blk;
 
-    for (ch = 0; ch < s->fbw_channels; ch++) {
-        for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
+    for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
             if (s->exp_strategy[ch][blk] == EXP_D15) {
                 s->exp_strategy[ch][blk] = EXP_D25;
                 return 0;
             }
         }
     }
-    for (ch = 0; ch < s->fbw_channels; ch++) {
-        for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
+    for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
             if (s->exp_strategy[ch][blk] == EXP_D25) {
                 s->exp_strategy[ch][blk] = EXP_D45;
                 return 0;
             }
         }
     }
-    for (ch = 0; ch < s->fbw_channels; ch++) {
-        /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if
-           the block number > 0 */
-        for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
+    /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if
+       the block number > 0 */
+    for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
             if (s->exp_strategy[ch][blk] > EXP_REUSE) {
                 s->exp_strategy[ch][blk] = EXP_REUSE;
                 return 0;
@@ -1162,7 +1589,18 @@ static int compute_bit_allocation(AC3EncodeContext *s)
 
     ret = cbr_bit_allocation(s);
     while (ret) {
-        /* fallback 1: downgrade exponents */
+        /* fallback 1: disable channel coupling */
+        if (s->cpl_on) {
+            s->cpl_on = 0;
+            compute_coupling_strategy(s);
+            compute_rematrixing_strategy(s);
+            apply_rematrixing(s);
+            process_exponents(s);
+            ret = compute_bit_allocation(s);
+            continue;
+        }
+
+        /* fallback 2: downgrade exponents */
         if (!downgrade_exponents(s)) {
             extract_exponents(s);
             encode_exponents(s);
@@ -1216,12 +1654,13 @@ static inline int asym_quant(int c, int e, int qbits)
  * Quantize a set of mantissas for a single channel in a single block.
  */
 static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
-                                      uint8_t *exp,
-                                      uint8_t *bap, uint16_t *qmant, int n)
+                                      uint8_t *exp, uint8_t *bap,
+                                      uint16_t *qmant, int start_freq,
+                                      int end_freq)
 {
     int i;
 
-    for (i = 0; i < n; i++) {
+    for (i = start_freq; i < end_freq; i++) {
         int v;
         int c = fixed_coef[i];
         int e = exp[i];
@@ -1311,19 +1750,27 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
  */
 static void quantize_mantissas(AC3EncodeContext *s)
 {
-    int blk, ch;
-
+    int blk, ch, ch0=0, got_cpl;
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
         AC3Block *ref_block;
         AC3Mant m = { 0 };
 
-        for (ch = 0; ch < s->channels; ch++) {
+        got_cpl = !block->cpl_in_use;
+        for (ch = 1; ch <= s->channels; ch++) {
+            if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
+                ch0     = ch - 1;
+                ch      = CPL_CH;
+                got_cpl = 1;
+            }
             ref_block = block->exp_ref_block[ch];
             quantize_mantissas_blk_ch(&m, block->fixed_coef[ch],
-                                      ref_block->exp[ch], ref_block->bap[ch],
-                                      block->qmant[ch], s->nb_coefs[ch]);
+                                      ref_block->exp[ch],
+                                      ref_block->bap[ch], block->qmant[ch],
+                                      s->start_freq[ch], block->end_freq[ch]);
+            if (ch == CPL_CH)
+                ch = ch0;
         }
     }
 }
@@ -1390,7 +1837,8 @@ static void output_frame_header(AC3EncodeContext *s)
  */
 static void output_audio_block(AC3EncodeContext *s, int blk)
 {
-    int ch, i, baie, rbnd;
+    int ch, i, baie, bnd, got_cpl;
+    int av_uninit(ch0);
     AC3Block *block = &s->blocks[blk];
 
     /* block switching */
@@ -1405,11 +1853,38 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     put_bits(&s->pb, 1, 0);
 
     /* channel coupling */
-    if (!blk) {
-        put_bits(&s->pb, 1, 1); /* coupling strategy present */
-        put_bits(&s->pb, 1, 0); /* no coupling strategy */
-    } else {
-        put_bits(&s->pb, 1, 0); /* no new coupling strategy */
+    put_bits(&s->pb, 1, block->new_cpl_strategy);
+    if (block->new_cpl_strategy) {
+        put_bits(&s->pb, 1, block->cpl_in_use);
+        if (block->cpl_in_use) {
+            int start_sub, end_sub;
+            for (ch = 1; ch <= s->fbw_channels; ch++)
+                put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+            if (s->channel_mode == AC3_CHMODE_STEREO)
+                put_bits(&s->pb, 1, 0); /* phase flags in use */
+            start_sub = (s->start_freq[CPL_CH] - 37) / 12;
+            end_sub   = (s->cpl_end_freq       - 37) / 12;
+            put_bits(&s->pb, 4, start_sub);
+            put_bits(&s->pb, 4, end_sub - 3);
+            for (bnd = start_sub+1; bnd < end_sub; bnd++)
+                put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+        }
+    }
+
+    /* coupling coordinates */
+    if (block->cpl_in_use) {
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (block->channel_in_cpl[ch]) {
+                put_bits(&s->pb, 1, block->new_cpl_coords);
+                if (block->new_cpl_coords) {
+                    put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
+                    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                        put_bits(&s->pb, 4, block->cpl_coord_exp [ch][bnd]);
+                        put_bits(&s->pb, 4, block->cpl_coord_mant[ch][bnd]);
+                    }
+                }
+            }
+        }
     }
 
     /* stereo rematrixing */
@@ -1417,40 +1892,41 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
         put_bits(&s->pb, 1, block->new_rematrixing_strategy);
         if (block->new_rematrixing_strategy) {
             /* rematrixing flags */
-            for (rbnd = 0; rbnd < s->num_rematrixing_bands; rbnd++)
-                put_bits(&s->pb, 1, block->rematrixing_flags[rbnd]);
+            for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++)
+                put_bits(&s->pb, 1, block->rematrixing_flags[bnd]);
         }
     }
 
     /* exponent strategy */
-    for (ch = 0; ch < s->fbw_channels; ch++)
+    for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
         put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
     if (s->lfe_on)
         put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
 
     /* bandwidth */
-    for (ch = 0; ch < s->fbw_channels; ch++) {
-        if (s->exp_strategy[ch][blk] != EXP_REUSE)
+    for (ch = 1; ch <= s->fbw_channels; ch++) {
+        if (s->exp_strategy[ch][blk] != EXP_REUSE && !block->channel_in_cpl[ch])
             put_bits(&s->pb, 6, s->bandwidth_code);
     }
 
     /* exponents */
-    for (ch = 0; ch < s->channels; ch++) {
+    for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
         int nb_groups;
+        int cpl = (ch == CPL_CH);
 
         if (s->exp_strategy[ch][blk] == EXP_REUSE)
             continue;
 
         /* DC exponent */
-        put_bits(&s->pb, 4, block->grouped_exp[ch][0]);
+        put_bits(&s->pb, 4, block->grouped_exp[ch][0] >> cpl);
 
         /* exponent groups */
-        nb_groups = exponent_group_tab[s->exp_strategy[ch][blk]-1][s->nb_coefs[ch]];
+        nb_groups = exponent_group_tab[cpl][s->exp_strategy[ch][blk]-1][block->end_freq[ch]-s->start_freq[ch]];
         for (i = 1; i <= nb_groups; i++)
             put_bits(&s->pb, 7, block->grouped_exp[ch][i]);
 
         /* gain range info */
-        if (ch != s->lfe_channel)
+        if (ch != s->lfe_channel && !cpl)
             put_bits(&s->pb, 2, 0);
     }
 
@@ -1466,23 +1942,40 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     /* snr offset */
-    put_bits(&s->pb, 1, baie);
-    if (baie) {
+    put_bits(&s->pb, 1, block->new_snr_offsets);
+    if (block->new_snr_offsets) {
         put_bits(&s->pb, 6, s->coarse_snr_offset);
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
             put_bits(&s->pb, 3, s->fast_gain_code[ch]);
         }
     }
 
+    /* coupling leak */
+    if (block->cpl_in_use) {
+        put_bits(&s->pb, 1, block->new_cpl_leak);
+        if (block->new_cpl_leak) {
+            put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
+            put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak);
+        }
+    }
+
     put_bits(&s->pb, 1, 0); /* no delta bit allocation */
     put_bits(&s->pb, 1, 0); /* no data to skip */
 
     /* mantissas */
-    for (ch = 0; ch < s->channels; ch++) {
+    got_cpl = !block->cpl_in_use;
+    for (ch = 1; ch <= s->channels; ch++) {
         int b, q;
-        AC3Block *ref_block = block->exp_ref_block[ch];
-        for (i = 0; i < s->nb_coefs[ch]; i++) {
+        AC3Block *ref_block;
+
+        if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
+            ch0     = ch - 1;
+            ch      = CPL_CH;
+            got_cpl = 1;
+        }
+        ref_block = block->exp_ref_block[ch];
+        for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) {
             q = block->qmant[ch][i];
             b = ref_block->bap[ch][i];
             switch (b) {
@@ -1496,6 +1989,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
             default:              put_bits(&s->pb, b-1, q); break;
             }
         }
+        if (ch == CPL_CH)
+            ch = ch0;
     }
 }
 
@@ -1881,6 +2376,12 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
 
     scale_coefficients(s);
 
+    s->cpl_on = s->cpl_enabled;
+    compute_coupling_strategy(s);
+
+    if (s->cpl_on)
+        apply_channel_coupling(s);
+
     compute_rematrixing_strategy(s);
 
     apply_rematrixing(s);
@@ -1961,7 +2462,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
     s->lfe_on       = !!(ch_layout & AV_CH_LOW_FREQUENCY);
     s->channels     = channels;
     s->fbw_channels = channels - s->lfe_on;
-    s->lfe_channel  = s->lfe_on ? s->fbw_channels : -1;
+    s->lfe_channel  = s->lfe_on ? s->fbw_channels + 1 : -1;
     if (s->lfe_on)
         ch_layout -= AV_CH_LOW_FREQUENCY;
 
@@ -2060,6 +2561,10 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
     s->rematrixing_enabled = s->options.stereo_rematrixing &&
                              (s->channel_mode == AC3_CHMODE_STEREO);
 
+    s->cpl_enabled = s->options.channel_coupling &&
+                     s->channel_mode >= AC3_CHMODE_STEREO &&
+                     CONFIG_AC3ENC_FLOAT;
+
     return 0;
 }
 
@@ -2071,7 +2576,8 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
  */
 static av_cold void set_bandwidth(AC3EncodeContext *s)
 {
-    int ch;
+    int blk, ch;
+    int av_uninit(cpl_start);
 
     if (s->cutoff) {
         /* calculate bandwidth based on user-specified cutoff frequency */
@@ -2084,11 +2590,54 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
     }
 
     /* set number of coefficients for each channel */
-    for (ch = 0; ch < s->fbw_channels; ch++) {
-        s->nb_coefs[ch] = s->bandwidth_code * 3 + 73;
+    for (ch = 1; ch <= s->fbw_channels; ch++) {
+        s->start_freq[ch] = 0;
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73;
+    }
+    /* LFE channel always has 7 coefs */
+    if (s->lfe_on) {
+        s->start_freq[s->lfe_channel] = 0;
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            s->blocks[blk].end_freq[ch] = 7;
+    }
+
+    /* initialize coupling strategy */
+    if (s->cpl_enabled) {
+        if (s->options.cpl_start >= 0) {
+            cpl_start = s->options.cpl_start;
+        } else {
+            cpl_start = ac3_coupling_start_tab[s->channel_mode-2][s->bit_alloc.sr_code][s->frame_size_code/2];
+            if (cpl_start < 0)
+                s->cpl_enabled = 0;
+        }
+    }
+    if (s->cpl_enabled) {
+        int i, cpl_start_band, cpl_end_band;
+        uint8_t *cpl_band_sizes = s->cpl_band_sizes;
+
+        cpl_end_band   = s->bandwidth_code / 4 + 3;
+        cpl_start_band = av_clip(cpl_start, 0, FFMIN(cpl_end_band-1, 15));
+
+        s->num_cpl_subbands = cpl_end_band - cpl_start_band;
+
+        s->num_cpl_bands = 1;
+        *cpl_band_sizes  = 12;
+        for (i = cpl_start_band + 1; i < cpl_end_band; i++) {
+            if (ff_eac3_default_cpl_band_struct[i]) {
+                *cpl_band_sizes += 12;
+            } else {
+                s->num_cpl_bands++;
+                cpl_band_sizes++;
+                *cpl_band_sizes = 12;
+            }
+        }
+
+        s->start_freq[CPL_CH] = cpl_start_band * 12 + 37;
+        s->cpl_end_freq       = cpl_end_band   * 12 + 37;
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq;
     }
-    if (s->lfe_on)
-        s->nb_coefs[s->lfe_channel] = 7; /* LFE channel always has 7 coefs */
 }
 
 
@@ -2096,6 +2645,7 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
 {
     int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
+    int channels = s->channels + 1; /* includes coupling channel */
 
     FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples),
                      alloc_fail);
@@ -2104,74 +2654,90 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
                           (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples),
                           alloc_fail);
     }
-    FF_ALLOC_OR_GOTO(avctx, s->bap_buffer,  AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->bap_buffer,  AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->bap_buffer),  alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
                      128 * sizeof(*s->grouped_exp_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels *
                      64 * sizeof(*s->band_psd_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels *
                      64 * sizeof(*s->mask_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * s->channels *
+    FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail);
+    if (s->cpl_enabled) {
+        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels *
+                         16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
+        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels *
+                         16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
+    }
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        FF_ALLOC_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap),
+        FF_ALLOC_OR_GOTO(avctx, block->bap, channels * sizeof(*block->bap),
                          alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, s->channels * sizeof(*block->mdct_coef),
+        FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->exp, s->channels * sizeof(*block->exp),
+        FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, s->channels * sizeof(*block->grouped_exp),
+        FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, channels * sizeof(*block->grouped_exp),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->psd, s->channels * sizeof(*block->psd),
+        FF_ALLOCZ_OR_GOTO(avctx, block->psd, channels * sizeof(*block->psd),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, s->channels * sizeof(*block->band_psd),
+        FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, channels * sizeof(*block->band_psd),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->mask, s->channels * sizeof(*block->mask),
+        FF_ALLOCZ_OR_GOTO(avctx, block->mask, channels * sizeof(*block->mask),
                           alloc_fail);
-        FF_ALLOCZ_OR_GOTO(avctx, block->qmant, s->channels * sizeof(*block->qmant),
+        FF_ALLOCZ_OR_GOTO(avctx, block->qmant, channels * sizeof(*block->qmant),
                           alloc_fail);
+        if (s->cpl_enabled) {
+            FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_exp, channels * sizeof(*block->cpl_coord_exp),
+                              alloc_fail);
+            FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_mant, channels * sizeof(*block->cpl_coord_mant),
+                              alloc_fail);
+        }
 
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = 0; ch < channels; ch++) {
             /* arrangement: block, channel, coeff */
-            block->bap[ch]         = &s->bap_buffer        [AC3_MAX_COEFS * (blk * s->channels + ch)];
-            block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (blk * s->channels + ch)];
-            block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * s->channels + ch)];
-            block->psd[ch]         = &s->psd_buffer        [AC3_MAX_COEFS * (blk * s->channels + ch)];
-            block->band_psd[ch]    = &s->band_psd_buffer   [64            * (blk * s->channels + ch)];
-            block->mask[ch]        = &s->mask_buffer       [64            * (blk * s->channels + ch)];
-            block->qmant[ch]       = &s->qmant_buffer      [AC3_MAX_COEFS * (blk * s->channels + ch)];
+            block->bap[ch]         = &s->bap_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
+            block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * channels + ch)];
+            block->psd[ch]         = &s->psd_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
+            block->band_psd[ch]    = &s->band_psd_buffer   [64            * (blk * channels + ch)];
+            block->mask[ch]        = &s->mask_buffer       [64            * (blk * channels + ch)];
+            block->qmant[ch]       = &s->qmant_buffer      [AC3_MAX_COEFS * (blk * channels + ch)];
+            if (s->cpl_enabled) {
+                block->cpl_coord_exp[ch]  = &s->cpl_coord_exp_buffer [16  * (blk * channels + ch)];
+                block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16  * (blk * channels + ch)];
+            }
 
             /* arrangement: channel, block, coeff */
             block->exp[ch]         = &s->exp_buffer        [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
+            block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
         }
     }
 
     if (CONFIG_AC3ENC_FLOAT) {
-        FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * s->channels *
+        FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
                          AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
             AC3Block *block = &s->blocks[blk];
-            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
+            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
                               sizeof(*block->fixed_coef), alloc_fail);
-            for (ch = 0; ch < s->channels; ch++)
-                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
+            for (ch = 0; ch < channels; ch++)
+                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
         }
     } else {
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
             AC3Block *block = &s->blocks[blk];
-            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
+            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
                               sizeof(*block->fixed_coef), alloc_fail);
-            for (ch = 0; ch < s->channels; ch++)
+            for (ch = 0; ch < channels; ch++)
                 block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
         }
     }
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index 800ef8f92c..035ebb3de9 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -101,7 +101,7 @@ static void scale_coefficients(AC3EncodeContext *s)
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        for (ch = 0; ch < s->channels; ch++) {
+        for (ch = 1; ch <= s->channels; ch++) {
             s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
                                        block->coeff_shift[ch]);
         }
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index d2435dee15..4f61440b52 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -93,8 +93,10 @@ static int normalize_samples(AC3EncodeContext *s)
  */
 static void scale_coefficients(AC3EncodeContext *s)
 {
-    s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer, s->mdct_coef_buffer,
-                               AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
+    int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+    s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
+                               s->mdct_coef_buffer  + chan_size,
+                               chan_size * s->channels);
 }
 
 
diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c
index 6a4d8cd0a2..7df3d828fb 100644
--- a/libavcodec/ac3tab.c
+++ b/libavcodec/ac3tab.c
@@ -138,6 +138,13 @@ const uint16_t ff_ac3_bitrate_tab[19] = {
  */
 const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 };
 
+/**
+ * Table E2.16 Default Coupling Banding Structure
+ */
+const uint8_t ff_eac3_default_cpl_band_struct[18] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1
+};
+
 /* AC-3 MDCT window */
 
 /* MDCT window */
diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h
index 292ce0d32f..e5cd368bb7 100644
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h
@@ -39,6 +39,7 @@ extern const uint8_t  ff_ac3_dec_channel_map[8][2][6];
 extern const uint16_t ff_ac3_sample_rate_tab[3];
 extern const uint16_t ff_ac3_bitrate_tab[19];
 extern const uint8_t  ff_ac3_rematrix_band_tab[5];
+extern const uint8_t  ff_eac3_default_cpl_band_struct[18];
 extern const int16_t  ff_ac3_window[AC3_WINDOW_SIZE/2];
 extern const uint8_t  ff_ac3_log_add_tab[260];
 extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3];

From d42dce7bb35927b5e93ba7569d9e8dba38ab95a1 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Tue, 24 May 2011 10:08:55 +0200
Subject: [PATCH 226/830] Remove avconfig.h and INCINSTDIRs on uninstall.

---
 subdir.mak | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subdir.mak b/subdir.mak
index 8a407fe164..33546fd8fd 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -88,9 +88,9 @@ uninstall-libs::
 	-$(RM) "$(LIBDIR)/$(LIBNAME)"
 
 uninstall-headers::
-	$(RM) $(addprefix "$(INCINSTDIR)/",$(HEADERS))
+	$(RM) $(addprefix "$(INCINSTDIR)/",$(HEADERS)) $(addprefix "$(INCINSTDIR)/",$(BUILT_HEADERS))
 	$(RM) "$(LIBDIR)/pkgconfig/lib$(NAME).pc"
-	-rmdir "$(INCDIR)"
+	-rmdir "$(INCINSTDIR)"
 endef
 
 $(eval $(RULES))

From 57aa765971878ff678abbe3bf1bfd9407b9e5a00 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 13 May 2011 08:03:36 +0200
Subject: [PATCH 227/830] lavc: remove msmpeg4v1 encoder.

The encoder has never produced files that could be decoded
with any software and there should be no reason to create
such files anyway.
---
 libavcodec/Makefile        |  2 --
 libavcodec/allcodecs.c     |  2 +-
 libavcodec/mpegvideo_enc.c | 21 ---------------------
 libavcodec/msmpeg4.c       |  8 --------
 libavcodec/msmpeg4.h       |  3 +--
 5 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6cb59a3c43..81981d76b6 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -262,8 +262,6 @@ OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpegvideo_enc.o \
                                           mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)     += vaapi_mpeg4.o
 OBJS-$(CONFIG_MSMPEG4V1_DECODER)       += msmpeg4.o msmpeg4data.o
-OBJS-$(CONFIG_MSMPEG4V1_ENCODER)       += msmpeg4.o msmpeg4data.o h263dec.o \
-                                          h263.o ituh263dec.o mpeg4videodec.o
 OBJS-$(CONFIG_MSMPEG4V2_DECODER)       += msmpeg4.o msmpeg4data.o h263dec.o \
                                           h263.o ituh263dec.o mpeg4videodec.o
 OBJS-$(CONFIG_MSMPEG4V2_ENCODER)       += msmpeg4.o msmpeg4data.o h263dec.o \
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 3466ad94fd..f063369dfd 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -147,7 +147,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (MPEGVIDEO, mpegvideo);
     REGISTER_DECODER (MPEG_VDPAU, mpeg_vdpau);
     REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau);
-    REGISTER_ENCDEC  (MSMPEG4V1, msmpeg4v1);
+    REGISTER_DECODER (MSMPEG4V1, msmpeg4v1);
     REGISTER_ENCDEC  (MSMPEG4V2, msmpeg4v2);
     REGISTER_ENCDEC  (MSMPEG4V3, msmpeg4v3);
     REGISTER_DECODER (MSRLE, msrle);
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 44ecba2524..8df05c4c7a 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -637,15 +637,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
         s->low_delay= s->max_b_frames ? 0 : 1;
         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
         break;
-    case CODEC_ID_MSMPEG4V1:
-        s->out_format = FMT_H263;
-        s->h263_msmpeg4 = 1;
-        s->h263_pred = 1;
-        s->unrestricted_mv = 1;
-        s->msmpeg4_version= 1;
-        avctx->delay=0;
-        s->low_delay=1;
-        break;
     case CODEC_ID_MSMPEG4V2:
         s->out_format = FMT_H263;
         s->h263_msmpeg4 = 1;
@@ -3805,18 +3796,6 @@ AVCodec ff_h263p_encoder = {
     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
 };
 
-AVCodec ff_msmpeg4v1_encoder = {
-    "msmpeg4v1",
-    AVMEDIA_TYPE_VIDEO,
-    CODEC_ID_MSMPEG4V1,
-    sizeof(MpegEncContext),
-    MPV_encode_init,
-    MPV_encode_picture,
-    MPV_encode_end,
-    .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
-    .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
-};
-
 AVCodec ff_msmpeg4v2_encoder = {
     "msmpeg4v2",
     AVMEDIA_TYPE_VIDEO,
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index 6c9c2096a2..84ba1fa76a 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -846,13 +846,6 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
     int pred, extquant;
     int extrabits = 0;
 
-    if(s->msmpeg4_version==1){
-        int32_t *dc_val;
-        pred = msmpeg4v1_pred_dc(s, n, &dc_val);
-
-        /* update predictor */
-        *dc_val= level;
-    }else{
         int16_t *dc_val;
         pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
 
@@ -862,7 +855,6 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
         } else {
             *dc_val = level * s->c_dc_scale;
         }
-    }
 
     /* do the prediction */
     level -= pred;
diff --git a/libavcodec/msmpeg4.h b/libavcodec/msmpeg4.h
index 0570bf9feb..8a0a066e48 100644
--- a/libavcodec/msmpeg4.h
+++ b/libavcodec/msmpeg4.h
@@ -54,8 +54,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
                                 CONFIG_MSMPEG4V3_DECODER || \
                                 CONFIG_WMV2_DECODER      || \
                                 CONFIG_VC1_DECODER)
-#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V1_ENCODER || \
-                                CONFIG_MSMPEG4V2_ENCODER || \
+#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V2_ENCODER || \
                                 CONFIG_MSMPEG4V3_ENCODER || \
                                 CONFIG_WMV2_ENCODER)
 

From b2893ee2f8b204f3d636c25a05d1dc1dd81dfdba Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 21 May 2011 12:23:34 +0200
Subject: [PATCH 228/830] msmpeg4: reindent.

---
 libavcodec/msmpeg4.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index 84ba1fa76a..c575a2f206 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -846,15 +846,15 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
     int pred, extquant;
     int extrabits = 0;
 
-        int16_t *dc_val;
-        pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+    int16_t *dc_val;
+    pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
 
-        /* update predictor */
-        if (n < 4) {
-            *dc_val = level * s->y_dc_scale;
-        } else {
-            *dc_val = level * s->c_dc_scale;
-        }
+    /* update predictor */
+    if (n < 4) {
+        *dc_val = level * s->y_dc_scale;
+    } else {
+        *dc_val = level * s->c_dc_scale;
+    }
 
     /* do the prediction */
     level -= pred;

From e9f4001a30c563a840614048629055769f6f7020 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 24 May 2011 13:18:31 +0200
Subject: [PATCH 229/830] wav: propagate ff_get_wav_header() error code in
 w64_read_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alos fix warning:
wav.c: In function ‘w64_read_header’:
wav.c:546: warning: ‘ret’ may be used uninitialized in this function
---
 libavformat/wav.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 9f7b3c75f9..a1f9ff109d 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -194,7 +194,7 @@ static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
     if (!*st)
         return AVERROR(ENOMEM);
 
-    ff_get_wav_header(pb, (*st)->codec, size);
+    ret = ff_get_wav_header(pb, (*st)->codec, size);
     if (ret < 0)
         return ret;
     (*st)->need_parsing = AVSTREAM_PARSE_FULL;

From 8d953175d4f7b2f5ea97489b5ba6d170b6ca461f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 24 May 2011 13:29:33 +0200
Subject: [PATCH 230/830] wav: fix various printf warnings related to wrong
 argument type

---
 libavformat/wav.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index a1f9ff109d..baa71561d2 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -235,7 +235,7 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
         return ret;
 
     time_reference = avio_rl64(s->pb);
-    snprintf(temp, sizeof(temp), "%lu", time_reference);
+    snprintf(temp, sizeof(temp), "%"PRIu64, time_reference);
     if ((ret = av_metadata_set2(&s->metadata, "time_reference", temp, 0)) < 0)
         return ret;
 
@@ -248,11 +248,12 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
             /* the string formatting below is per SMPTE 330M-2004 Annex C */
             if (umid_parts[4] == 0 && umid_parts[5] == 0 && umid_parts[6] == 0 && umid_parts[7] == 0) {
                 /* basic UMID */
-                snprintf(temp, sizeof(temp), "0x%016lX%016lX%016lX%016lX",
+                snprintf(temp, sizeof(temp), "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64,
                          umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3]);
             } else {
                 /* extended UMID */
-                snprintf(temp, sizeof(temp), "0x%016lX%016lX%016lX%016lX%016lX%016lX%016lX%016lX",
+                snprintf(temp, sizeof(temp), "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64
+                                             "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64,
                          umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3],
                          umid_parts[4], umid_parts[5], umid_parts[6], umid_parts[7]);
             }
@@ -320,7 +321,7 @@ static int wav_read_header(AVFormatContext *s,
         sample_count = avio_rl64(pb);
         if (data_size < 0 || sample_count < 0) {
             av_log(s, AV_LOG_ERROR, "negative data_size and/or sample_count in "
-                   "ds64: data_size = %li, sample_count = %li\n",
+                   "ds64: data_size = %"PRId64", sample_count = %"PRId64"\n",
                    data_size, sample_count);
             return AVERROR_INVALIDDATA;
         }

From f7053dc41a29fdd2592f57ced97420ac917d3ca9 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 3 Apr 2011 16:48:33 +0200
Subject: [PATCH 231/830] vsrc_buffer: tweak error message in init()

Change:
Expected 7 arguments, but only %d found in '%s'\n
to:
Expected 7 arguments, but %d found in '%s'\n

as the user may provide more than 7 arguments, in that case the error
is not misleading.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavfilter/vsrc_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index 6567279667..1f0233e3e3 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -73,7 +73,7 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
         (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str,
                     &c->time_base.num, &c->time_base.den,
                     &c->pixel_aspect.num, &c->pixel_aspect.den)) != 7) {
-        av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but only %d found in '%s'\n", n, args);
+        av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but %d found in '%s'\n", n, args);
         return AVERROR(EINVAL);
     }
     if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) {

From 75abcdb3915e3abb2dc6b5f7d101c177dcfdb626 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 11 Apr 2011 11:29:35 +0200
Subject: [PATCH 232/830] vsrc_buffer.h: add file doxy

Signed-off-by: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavfilter/vsrc_buffer.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index 6867f81e1c..cfaf7919ac 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -22,6 +22,11 @@
 #ifndef AVFILTER_VSRC_BUFFER_H
 #define AVFILTER_VSRC_BUFFER_H
 
+/**
+ * @file
+ * memory buffer source API for video
+ */
+
 #include "libavcodec/avcodec.h" /* AVFrame */
 #include "avfilter.h"
 

From e66149e714006d099d1ebfcca3f22ca74fc7dcf4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 10:03:26 -0400
Subject: [PATCH 233/830] swscale: force --enable-runtime-cpudetect and remove
 SWS_CPU_CAPS_*.

---
 libswscale/bfin/swscale_bfin.c    |  20 ++---
 libswscale/colorspace-test.c      |  29 +------
 libswscale/options.c              |   6 --
 libswscale/ppc/yuv2rgb_altivec.c  |   3 +-
 libswscale/rgb2rgb.c              |   7 +-
 libswscale/rgb2rgb.h              |   4 +-
 libswscale/rgb2rgb_template.c     |  19 -----
 libswscale/swscale.c              | 134 ++++++------------------------
 libswscale/swscale.h              |   7 --
 libswscale/swscale_internal.h     |   5 --
 libswscale/utils.c                | 120 ++++++++++++--------------
 libswscale/x86/rgb2rgb.c          |  21 ++---
 libswscale/x86/swscale_template.c |   7 +-
 libswscale/x86/yuv2rgb_mmx.c      |  70 ++++++++--------
 libswscale/x86/yuv2rgb_template.c |  31 +++----
 libswscale/yuv2rgb.c              |  28 +++----
 16 files changed, 171 insertions(+), 340 deletions(-)

diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index fa3c03b4ea..60b7f8310b 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -79,15 +79,13 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
 void ff_bfin_get_unscaled_swscale(SwsContext *c)
 {
     SwsFunc swScale = c->swScale;
-    if (c->flags & SWS_CPU_CAPS_BFIN)
-        if (c->dstFormat == PIX_FMT_YUV420P)
-            if (c->srcFormat == PIX_FMT_UYVY422) {
-                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
-                c->swScale = uyvytoyv12_unscaled;
-            }
-        if (c->dstFormat == PIX_FMT_YUV420P)
-            if (c->srcFormat == PIX_FMT_YUYV422) {
-                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
-                c->swScale = yuyvtoyv12_unscaled;
-            }
+
+    if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) {
+        av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
+        c->swScale = uyvytoyv12_unscaled;
+    }
+    if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) {
+        av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
+        c->swScale = yuyvtoyv12_unscaled;
+    }
 }
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 4e7116f5dc..4ed9164323 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -33,31 +33,6 @@
 
 #define FUNC(s,d,n) {s,d,#n,n}
 
-static int cpu_caps;
-
-static char *args_parse(int argc, char *argv[])
-{
-    int o;
-
-    while ((o = getopt(argc, argv, "m23")) != -1) {
-        switch (o) {
-        case 'm':
-            cpu_caps |= SWS_CPU_CAPS_MMX;
-            break;
-        case '2':
-            cpu_caps |= SWS_CPU_CAPS_MMX2;
-            break;
-        case '3':
-            cpu_caps |= SWS_CPU_CAPS_3DNOW;
-            break;
-        default:
-            av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
-        }
-    }
-
-    return argv[optind];
-}
-
 int main(int argc, char **argv)
 {
     int i, funcNum;
@@ -70,9 +45,7 @@ int main(int argc, char **argv)
         return -1;
 
     av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
-    args_parse(argc, argv);
-    av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
-    sws_rgb2rgb_init(cpu_caps);
+    sws_rgb2rgb_init();
 
     for(funcNum=0; ; funcNum++) {
         struct func_info_s {
diff --git a/libswscale/options.c b/libswscale/options.c
index f80735b261..ecd0ecd53a 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -48,12 +48,6 @@ static const AVOption options[] = {
     { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "print_info", "print info", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_SSE2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_3DNOW }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_ALTIVEC }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_BFIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
     { "bitexact", "", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 0113c8ddd5..96c208a074 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -94,6 +94,7 @@ adjustment.
 #include "libswscale/rgb2rgb.h"
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
+#include "libavutil/cpu.h"
 
 #undef PROFILE_THE_BEAST
 #undef INC_SCALING
@@ -692,7 +693,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
 */
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
 {
-    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
+    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
         return NULL;
 
     /*
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 74cc42e3b0..e18cd51011 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -116,12 +116,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
  32-bit C version, and and&add trick by Michael Niedermayer
 */
 
-void sws_rgb2rgb_init(int flags)
+void sws_rgb2rgb_init(void)
 {
     rgb2rgb_init_c();
-#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    rgb2rgb_init_x86(flags);
-#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
+    if (HAVE_MMX)
+        rgb2rgb_init_x86();
 }
 
 void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 158d9a9a87..a8d5531cb8 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -156,8 +156,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
                             long width, long height,
                             long lumStride, long chromStride, long srcStride);
 
-void sws_rgb2rgb_init(int flags);
+void sws_rgb2rgb_init(void);
 
-void rgb2rgb_init_x86(int flags);
+void rgb2rgb_init_x86(void);
 
 #endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 7537419d83..ea39be4f13 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -278,25 +278,6 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-/*
- * mm0 = 00 B3 00 B2 00 B1 00 B0
- * mm1 = 00 G3 00 G2 00 G1 00 G0
- * mm2 = 00 R3 00 R2 00 R1 00 R0
- * mm6 = FF FF FF FF FF FF FF FF
- * mm7 = 00 00 00 00 00 00 00 00
- */
-#define PACK_RGB32 \
-    "packuswb   %%mm7, %%mm0    \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
-    "packuswb   %%mm7, %%mm1    \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
-    "packuswb   %%mm7, %%mm2    \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
-    "punpcklbw  %%mm1, %%mm0    \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
-    "punpcklbw  %%mm6, %%mm2    \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
-    "movq       %%mm0, %%mm3    \n\t"                               \
-    "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
-    "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
-    MOVNTQ"     %%mm0,  %0      \n\t"                               \
-    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
-
 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 3943aa0b72..749a276e5a 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -62,6 +62,7 @@ untested special converters
 #include "rgb2rgb.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 #include "libavutil/avutil.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/bswap.h"
@@ -70,10 +71,6 @@ untested special converters
 #undef MOVNTQ
 #undef PAVGB
 
-//#undef HAVE_MMX2
-//#define HAVE_AMD3DNOW
-//#undef HAVE_MMX
-//#undef ARCH_X86
 #define DITHER1XBPP
 
 #define isPacked(x)         (       \
@@ -1179,57 +1176,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 
 //Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
 //Plain C versions
-#if CONFIG_RUNTIME_CPUDETECT
-#  define COMPILE_C 1
-#  if   ARCH_X86
-#    define COMPILE_MMX     HAVE_MMX
-#    define COMPILE_MMX2    HAVE_MMX2
-#    define COMPILE_3DNOW   HAVE_AMD3DNOW
-#  elif ARCH_PPC
-#    define COMPILE_ALTIVEC HAVE_ALTIVEC
-#  endif
-#else /* CONFIG_RUNTIME_CPUDETECT */
-#  if   ARCH_X86
-#    if   HAVE_MMX2
-#      define COMPILE_MMX2  1
-#    elif HAVE_AMD3DNOW
-#      define COMPILE_3DNOW 1
-#    elif HAVE_MMX
-#      define COMPILE_MMX   1
-#    else
-#      define COMPILE_C     1
-#    endif
-#  elif ARCH_PPC && HAVE_ALTIVEC
-#    define COMPILE_ALTIVEC 1
-#  else
-#    define COMPILE_C       1
-#  endif
-#endif
 
-#ifndef COMPILE_C
-#  define COMPILE_C 0
-#endif
-#ifndef COMPILE_MMX
-#  define COMPILE_MMX 0
-#endif
-#ifndef COMPILE_MMX2
-#  define COMPILE_MMX2 0
-#endif
-#ifndef COMPILE_3DNOW
-#  define COMPILE_3DNOW 0
-#endif
-#ifndef COMPILE_ALTIVEC
-#  define COMPILE_ALTIVEC 0
-#endif
-
-#define COMPILE_TEMPLATE_MMX 0
 #define COMPILE_TEMPLATE_MMX2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define COMPILE_TEMPLATE_ALTIVEC 0
 
 #include "swscale_template.c"
 
-#if COMPILE_ALTIVEC
+#if HAVE_ALTIVEC
 #undef RENAME
 #undef COMPILE_TEMPLATE_ALTIVEC
 #define COMPILE_TEMPLATE_ALTIVEC 1
@@ -1237,15 +1191,11 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #include "ppc/swscale_template.c"
 #endif
 
-#if ARCH_X86
-
 //MMX versions
-#if COMPILE_MMX
+#if HAVE_MMX
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMX
 #undef COMPILE_TEMPLATE_MMX2
 #undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMX 1
 #define COMPILE_TEMPLATE_MMX2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX
@@ -1253,12 +1203,10 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #endif
 
 //MMX2 versions
-#if COMPILE_MMX2
+#if HAVE_MMX2
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMX
 #undef COMPILE_TEMPLATE_MMX2
 #undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMX 1
 #define COMPILE_TEMPLATE_MMX2 1
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX2
@@ -1266,61 +1214,47 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #endif
 
 //3DNOW versions
-#if COMPILE_3DNOW
+#if HAVE_AMD3DNOW
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMX
 #undef COMPILE_TEMPLATE_MMX2
 #undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMX 1
 #define COMPILE_TEMPLATE_MMX2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNow
 #include "x86/swscale_template.c"
 #endif
 
-#endif //ARCH_X86
-
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
+    int cpu_flags = av_get_cpu_flags();
+
     sws_init_swScale_c(c);
 
-#if CONFIG_RUNTIME_CPUDETECT
-#if ARCH_X86
+#if HAVE_MMX2
     // ordered per speed fastest first
-    if (c->flags & SWS_CPU_CAPS_MMX2) {
+    if (cpu_flags & AV_CPU_FLAG_MMX2) {
         sws_init_swScale_MMX2(c);
         return swScale_MMX2;
-    } else if (c->flags & SWS_CPU_CAPS_3DNOW) {
+    } else
+#endif
+#if HAVE_AMD3DNOW
+    if (cpu_flags & AV_CPU_FLAG_3DNOW) {
         sws_init_swScale_3DNow(c);
         return swScale_3DNow;
-    } else if (c->flags & SWS_CPU_CAPS_MMX) {
+    } else
+#endif
+#if HAVE_MMX
+    if (cpu_flags & AV_CPU_FLAG_MMX) {
         sws_init_swScale_MMX(c);
         return swScale_MMX;
-    }
-
-#else
-#if COMPILE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC) {
+    } else
+#endif
+#if HAVE_ALTIVEC
+    if (cpu_flags & AV_CPU_FLAG_ALTIVEC) {
         sws_init_swScale_altivec(c);
         return swScale_altivec;
-    }
+    } else
 #endif
-#endif /* ARCH_X86 */
-#else //CONFIG_RUNTIME_CPUDETECT
-#if   COMPILE_TEMPLATE_MMX2
-    sws_init_swScale_MMX2(c);
-    return swScale_MMX2;
-#elif COMPILE_TEMPLATE_AMD3DNOW
-    sws_init_swScale_3DNow(c);
-    return swScale_3DNow;
-#elif COMPILE_TEMPLATE_MMX
-    sws_init_swScale_MMX(c);
-    return swScale_MMX;
-#elif COMPILE_TEMPLATE_ALTIVEC
-    sws_init_swScale_altivec(c);
-    return swScale_altivec;
-#endif
-#endif //!CONFIG_RUNTIME_CPUDETECT
 
     return swScale_c;
 }
@@ -1864,23 +1798,6 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
     return srcSliceH;
 }
 
-int ff_hardcodedcpuflags(void)
-{
-    int flags = 0;
-#if   COMPILE_TEMPLATE_MMX2
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
-#elif COMPILE_TEMPLATE_AMD3DNOW
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
-#elif COMPILE_TEMPLATE_MMX
-    flags |= SWS_CPU_CAPS_MMX;
-#elif COMPILE_TEMPLATE_ALTIVEC
-    flags |= SWS_CPU_CAPS_ALTIVEC;
-#elif ARCH_BFIN
-    flags |= SWS_CPU_CAPS_BFIN;
-#endif
-    return flags;
-}
-
 void ff_get_unscaled_swscale(SwsContext *c)
 {
     const enum PixelFormat srcFormat = c->srcFormat;
@@ -1964,8 +1881,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
     if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
         c->swScale= uyvyToYuv422Wrapper;
 
-#if COMPILE_ALTIVEC
-    if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+#if HAVE_ALTIVEC
+    if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) &&
         !(c->flags & SWS_BITEXACT) &&
         srcFormat == PIX_FMT_YUV420P) {
         // unscaled YV12 -> packed YUV, we want speed
@@ -1995,8 +1912,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
             c->swScale= planarCopyWrapper;
     }
 #if ARCH_BFIN
-    if (flags & SWS_CPU_CAPS_BFIN)
-        ff_bfin_get_unscaled_swscale (c);
+    ff_bfin_get_unscaled_swscale (c);
 #endif
 }
 
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index dd4de76b0a..1babced737 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -95,13 +95,6 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
-#define SWS_CPU_CAPS_MMX      0x80000000
-#define SWS_CPU_CAPS_MMX2     0x20000000
-#define SWS_CPU_CAPS_3DNOW    0x40000000
-#define SWS_CPU_CAPS_ALTIVEC  0x10000000
-#define SWS_CPU_CAPS_BFIN     0x01000000
-#define SWS_CPU_CAPS_SSE2     0x02000000
-
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 
 #define SWS_CS_ITU709         1
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 2369546cb7..5f2ff94691 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -475,11 +475,6 @@ extern const AVClass sws_context_class;
  */
 void ff_get_unscaled_swscale(SwsContext *c);
 
-/**
- * Returns the SWS_CPU_CAPS for the optimized code compiled into swscale.
- */
-int ff_hardcodedcpuflags(void);
-
 /**
  * Returns function pointer to fastest main scaler path function depending
  * on architecture and available optimizations.
diff --git a/libswscale/utils.c b/libswscale/utils.c
index e9319fa67b..b49ec89be8 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -39,6 +39,7 @@
 #include "rgb2rgb.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 #include "libavutil/avutil.h"
 #include "libavutil/bswap.h"
 #include "libavutil/opt.h"
@@ -180,7 +181,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
 }
 
 static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
-                      int srcW, int dstW, int filterAlign, int one, int flags,
+                      int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
                       SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
 {
     int i;
@@ -191,10 +192,9 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
     int64_t *filter2=NULL;
     const int64_t fone= 1LL<<54;
     int ret= -1;
-#if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+
+    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
-#endif
 
     // NOTE: the +1 is for the MMX scaler which reads over the end
     FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail);
@@ -411,7 +411,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
         if (min>minFilterSize) minFilterSize= min;
     }
 
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) {
         // we can handle the special case 4,
         // so we don't want to go to the full 8
         if (minFilterSize < 5)
@@ -426,7 +426,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
             filterAlign = 1;
     }
 
-    if (flags & SWS_CPU_CAPS_MMX) {
+    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
         // special case for unscaled vertical filtering
         if (minFilterSize == 1 && filterAlign == 2)
             filterAlign= 1;
@@ -516,7 +516,7 @@ fail:
     return ret;
 }
 
-#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
+#if HAVE_MMX2
 static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits)
 {
     uint8_t *fragmentA;
@@ -674,7 +674,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
 
     return fragmentPos + 1;
 }
-#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
+#endif /* HAVE_MMX2 */
 
 static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
 {
@@ -682,8 +682,6 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
     *v = av_pix_fmt_descriptors[format].log2_chroma_h;
 }
 
-static int update_flags_cpu(int flags);
-
 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
 {
     memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
@@ -698,15 +696,12 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
 
     c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
     c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
-    c->flags = update_flags_cpu(c->flags);
 
     ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
     //FIXME factorize
 
-#if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+    if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)
         ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
-#endif
     return 0;
 }
 
@@ -736,20 +731,6 @@ static int handle_jpeg(enum PixelFormat *format)
     }
 }
 
-static int update_flags_cpu(int flags)
-{
-#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
-    flags &= ~( SWS_CPU_CAPS_MMX
-               |SWS_CPU_CAPS_MMX2
-               |SWS_CPU_CAPS_3DNOW
-               |SWS_CPU_CAPS_SSE2
-               |SWS_CPU_CAPS_ALTIVEC
-               |SWS_CPU_CAPS_BFIN);
-    flags |= ff_hardcodedcpuflags();
-#endif /* CONFIG_RUNTIME_CPUDETECT */
-    return flags;
-}
-
 SwsContext *sws_alloc_context(void)
 {
     SwsContext *c= av_mallocz(sizeof(SwsContext));
@@ -770,16 +751,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     int srcH= c->srcH;
     int dstW= c->dstW;
     int dstH= c->dstH;
-    int flags;
+    int flags, cpu_flags;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
 
-    flags= c->flags = update_flags_cpu(c->flags);
-#if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+    cpu_flags = av_get_cpu_flags();
+    flags     = c->flags;
+    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory");
-#endif
-    if (!rgb15to16) sws_rgb2rgb_init(flags);
+    if (!rgb15to16) sws_rgb2rgb_init();
 
     unscaled = (srcW == dstW && srcH == dstH);
 
@@ -872,7 +852,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
             if (flags&SWS_PRINT_INFO)
@@ -898,7 +878,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             c->chrXInc+= 20;
         }
         //we don't use the x86 asm scaler if MMX is available
-        else if (flags & SWS_CPU_CAPS_MMX) {
+        else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
             c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
         }
@@ -906,7 +886,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
     /* precalculate horizontal scaler filter coefficients */
     {
-#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
+#if HAVE_MMX2
 // can't downscale !!!
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
             c->lumMmx2FilterCodeSize = initMMX2HScaler(      dstW, c->lumXInc, NULL, NULL, NULL, 8);
@@ -938,21 +918,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
 #endif
         } else
-#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
+#endif /* HAVE_MMX2 */
         {
             const int filterAlign=
-                (flags & SWS_CPU_CAPS_MMX) ? 4 :
-                (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+                (HAVE_MMX     && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
+                (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
                 1;
 
             if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
                            srcW      ,       dstW, filterAlign, 1<<14,
-                           (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                           (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags, cpu_flags,
                            srcFilter->lumH, dstFilter->lumH, c->param) < 0)
                 goto fail;
             if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
                            c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
-                           (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                           (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags,
                            srcFilter->chrH, dstFilter->chrH, c->param) < 0)
                 goto fail;
         }
@@ -961,18 +941,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     /* precalculate vertical scaler filter coefficients */
     {
         const int filterAlign=
-            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
-            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+            (HAVE_MMX     && cpu_flags & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
+            (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
             1;
 
         if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
                        srcH      ,        dstH, filterAlign, (1<<12),
-                       (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                       (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags, cpu_flags,
                        srcFilter->lumV, dstFilter->lumV, c->param) < 0)
             goto fail;
         if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
                        c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
-                       (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                       (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags,
                        srcFilter->chrV, dstFilter->chrV, c->param) < 0)
             goto fail;
 
@@ -1066,13 +1046,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 #endif
                sws_format_name(dstFormat));
 
-        if      (flags & SWS_CPU_CAPS_MMX2)    av_log(c, AV_LOG_INFO, "using MMX2\n");
-        else if (flags & SWS_CPU_CAPS_3DNOW)   av_log(c, AV_LOG_INFO, "using 3DNOW\n");
-        else if (flags & SWS_CPU_CAPS_MMX)     av_log(c, AV_LOG_INFO, "using MMX\n");
-        else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n");
+        if      (HAVE_MMX2     && cpu_flags & AV_CPU_FLAG_MMX2)    av_log(c, AV_LOG_INFO, "using MMX2\n");
+        else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)   av_log(c, AV_LOG_INFO, "using 3DNOW\n");
+        else if (HAVE_MMX      && cpu_flags & AV_CPU_FLAG_MMX)     av_log(c, AV_LOG_INFO, "using MMX\n");
+        else if (HAVE_ALTIVEC  && cpu_flags & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n");
         else                                   av_log(c, AV_LOG_INFO, "using C\n");
 
-        if (flags & SWS_CPU_CAPS_MMX) {
+        if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
             if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
                 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
             else {
@@ -1091,7 +1071,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                     av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
             }
         } else {
-#if ARCH_X86
+#if HAVE_MMX
             av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
 #else
             if (flags & SWS_FAST_BILINEAR)
@@ -1102,31 +1082,41 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
         if (isPlanarYUV(dstFormat)) {
             if (c->vLumFilterSize==1)
-                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n",
+                       (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n",
+                       (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         } else {
             if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
                 av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
-                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                       "      2-tap scaler for vertical chrominance scaling (BGR)\n",
+                       (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
-                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n",
+                       (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n",
+                       (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         }
 
         if (dstFormat==PIX_FMT_BGR24)
             av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
-                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
+                   (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) ? "MMX2" :
+                   ((HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"));
         else if (dstFormat==PIX_FMT_RGB32)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n",
+                   (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n",
+                   (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR555)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n",
+                   (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
                  dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n",
+                   (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
 
         av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
         av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
@@ -1504,7 +1494,7 @@ void sws_freeContext(SwsContext *c)
     av_freep(&c->hLumFilterPos);
     av_freep(&c->hChrFilterPos);
 
-#if ARCH_X86
+#if HAVE_MMX
 #ifdef MAP_ANONYMOUS
     if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
     if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize);
@@ -1517,7 +1507,7 @@ void sws_freeContext(SwsContext *c)
 #endif
     c->lumMmx2FilterCode=NULL;
     c->chrMmx2FilterCode=NULL;
-#endif /* ARCH_X86 */
+#endif /* HAVE_MMX */
 
     av_freep(&c->yuvTable);
 
@@ -1534,8 +1524,6 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
     if (!param)
         param = default_param;
 
-    flags = update_flags_cpu(flags);
-
     if (context &&
         (context->srcW      != srcW      ||
          context->srcH      != srcH      ||
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index e84bc1bcc9..cf901affe6 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -27,6 +27,7 @@
 
 #include "config.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 #include "libavutil/bswap.h"
 #include "libswscale/rgb2rgb.h"
 #include "libswscale/swscale.h"
@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
  32-bit C version, and and&add trick by Michael Niedermayer
 */
 
-void rgb2rgb_init_x86(int flags)
+void rgb2rgb_init_x86(void)
 {
-#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    if (flags & SWS_CPU_CAPS_SSE2)
-        rgb2rgb_init_SSE2();
-    else if (flags & SWS_CPU_CAPS_MMX2)
-        rgb2rgb_init_MMX2();
-    else if (flags & SWS_CPU_CAPS_3DNOW)
-        rgb2rgb_init_3DNOW();
-    else if (flags & SWS_CPU_CAPS_MMX)
+    int cpu_flags = av_get_cpu_flags();
+
+    if (HAVE_MMX      && cpu_flags & AV_CPU_FLAG_MMX)
         rgb2rgb_init_MMX();
-#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
+    if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
+        rgb2rgb_init_3DNOW();
+    if (HAVE_MMX2     && cpu_flags & AV_CPU_FLAG_MMX2)
+        rgb2rgb_init_MMX2();
+    if (HAVE_SSE      && cpu_flags & AV_CPU_FLAG_SSE2)
+        rgb2rgb_init_SSE2();
 }
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index d719721693..e9e093780d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2721,10 +2721,11 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
-    if (flags & SWS_CPU_CAPS_MMX2 )  __asm__ volatile("sfence":::"memory");
+    if (COMPILE_TEMPLATE_MMX2)      __asm__ volatile("sfence":::"memory");
     /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-    if (flags & SWS_CPU_CAPS_3DNOW)  __asm__ volatile("femms" :::"memory");
-    else                             __asm__ volatile("emms"  :::"memory");
+    if (COMPILE_TEMPLATE_AMD3DNOW)  __asm__ volatile("femms" :::"memory");
+    else                            __asm__ volatile("emms"  :::"memory");
+
     /* store changed local vars back in the context */
     c->dstY= dstY;
     c->lumBufIndex= lumBufIndex;
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index ff3a93db36..23d4c42700 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -34,6 +34,7 @@
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 
 #define DITHER1XBPP // only for MMX
 
@@ -46,57 +47,58 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
+#if HAVE_MMX
 #undef RENAME
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
+#undef COMPILE_TEMPLATE_MMX2
+#define COMPILE_TEMPLATE_MMX2 0
 #define RENAME(a) a ## _MMX
 #include "yuv2rgb_template.c"
+#endif /* HAVE_MMX */
 
 //MMX2 versions
+#if HAVE_MMX2
 #undef RENAME
-#undef HAVE_MMX2
-#define HAVE_MMX2 1
+#undef COMPILE_TEMPLATE_MMX2
+#define COMPILE_TEMPLATE_MMX2 1
 #define RENAME(a) a ## _MMX2
 #include "yuv2rgb_template.c"
+#endif /* HAVE_MMX2 */
 
 SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
 {
-    if (c->flags & SWS_CPU_CAPS_MMX2) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (c->srcFormat != PIX_FMT_YUV420P &&
+        c->srcFormat != PIX_FMT_YUVA420P)
+        return NULL;
+
+    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
         switch (c->dstFormat) {
-        case PIX_FMT_RGB32:
-            if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-                if (HAVE_7REGS) return yuva420_rgb32_MMX2;
-                break;
-            } else return yuv420_rgb32_MMX2;
-        case PIX_FMT_BGR32:
-            if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-                if (HAVE_7REGS) return yuva420_bgr32_MMX2;
-                break;
-            } else return yuv420_bgr32_MMX2;
         case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2;
         case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2;
-        case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
-        case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
         }
     }
-    if (c->flags & SWS_CPU_CAPS_MMX) {
+
+    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
-        case PIX_FMT_RGB32:
-            if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-                if (HAVE_7REGS) return yuva420_rgb32_MMX;
-                break;
-            } else return yuv420_rgb32_MMX;
-        case PIX_FMT_BGR32:
-            if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-                if (HAVE_7REGS) return yuva420_bgr32_MMX;
-                break;
-            } else return yuv420_bgr32_MMX;
-        case PIX_FMT_RGB24:  return yuv420_rgb24_MMX;
-        case PIX_FMT_BGR24:  return yuv420_bgr24_MMX;
-        case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
-        case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
+            case PIX_FMT_RGB32:
+                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS
+                    return yuva420_rgb32_MMX;
+#endif
+                    break;
+                } else return yuv420_rgb32_MMX;
+            case PIX_FMT_BGR32:
+                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS
+                    return yuva420_bgr32_MMX;
+#endif
+                    break;
+                } else return yuv420_bgr32_MMX;
+            case PIX_FMT_RGB24:  return yuv420_rgb24_MMX;
+            case PIX_FMT_BGR24:  return yuv420_bgr24_MMX;
+            case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
+            case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
         }
     }
 
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index cf8f0d3cfb..5d1fa5b309 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -25,14 +25,7 @@
 #undef EMMS
 #undef SFENCE
 
-#if HAVE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS   "femms"
-#else
-#define EMMS   "emms"
-#endif
-
-#if HAVE_MMX2
+#if COMPILE_TEMPLATE_MMX2
 #define MOVNTQ "movntq"
 #define SFENCE "sfence"
 #else
@@ -159,7 +152,8 @@
     }                                                             \
 
 #define YUV2RGB_ENDFUNC                          \
-    __asm__ volatile (SFENCE"\n\t"EMMS);         \
+    __asm__ volatile (SFENCE"\n\t"               \
+                    "emms    \n\t");             \
     return srcSliceH;                            \
 
 #define IF0(x)
@@ -188,6 +182,7 @@
     "paddusb "GREEN_DITHER"(%4), %%mm2\n\t"      \
     "paddusb "RED_DITHER"(%4),   %%mm1\n\t"      \
 
+#if !COMPILE_TEMPLATE_MMX2
 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
                                        int srcStride[],
                                        int srcSliceY, int srcSliceH,
@@ -243,6 +238,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
     YUV2RGB_OPERANDS
     YUV2RGB_ENDFUNC
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 #define RGB_PACK24(blue, red)\
     "packuswb  %%mm3,      %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
@@ -259,7 +255,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
     "punpckhwd %%mm6,      %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
     RGB_PACK24_B
 
-#if HAVE_MMX2
+#if COMPILE_TEMPLATE_MMX2
 DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
 DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
 DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
@@ -366,6 +362,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
     MOVNTQ "   %%mm5,       16(%1)\n\t"      \
     MOVNTQ "   %%mm"alpha", 24(%1)\n\t"      \
 
+#if !COMPILE_TEMPLATE_MMX2
 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
                                        int srcStride[],
                                        int srcSliceY, int srcSliceH,
@@ -386,12 +383,12 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
     YUV2RGB_ENDFUNC
 }
 
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
                                         int srcStride[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-#if HAVE_7REGS
     int y, h_size;
 
     YUV2RGB_LOOP(4)
@@ -406,10 +403,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
     YUV2RGB_ENDLOOP(4)
     YUV2RGB_OPERANDS_ALPHA
     YUV2RGB_ENDFUNC
-#else
-    return 0;
-#endif
 }
+#endif
 
 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
                                        int srcStride[],
@@ -431,12 +426,12 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
     YUV2RGB_ENDFUNC
 }
 
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
                                         int srcStride[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-#if HAVE_7REGS
     int y, h_size;
 
     YUV2RGB_LOOP(4)
@@ -451,7 +446,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
     YUV2RGB_ENDLOOP(4)
     YUV2RGB_OPERANDS_ALPHA
     YUV2RGB_ENDFUNC
-#else
-    return 0;
-#endif
 }
+#endif
+
+#endif /* !COMPILE_TEMPLATE_MMX2 */
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 74262c6b7f..a502f654ed 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -32,7 +32,7 @@
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
-#include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 #include "libavutil/bswap.h"
 
 extern const uint8_t dither_4x4_16[4][8];
@@ -579,24 +579,18 @@ CLOSEYUV2RGBFUNC(1)
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
     SwsFunc t = NULL;
-#if HAVE_MMX
-     t = ff_yuv2rgb_init_mmx(c);
-#endif
-#if HAVE_VIS
-    t = ff_yuv2rgb_init_vis(c);
-#endif
-#if CONFIG_MLIB
-    t = ff_yuv2rgb_init_mlib(c);
-#endif
-#if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
-        t = ff_yuv2rgb_init_altivec(c);
-#endif
 
-#if ARCH_BFIN
-    if (c->flags & SWS_CPU_CAPS_BFIN)
+    if (HAVE_MMX) {
+        t = ff_yuv2rgb_init_mmx(c);
+    } else if (HAVE_VIS) {
+        t = ff_yuv2rgb_init_vis(c);
+    } else if (CONFIG_MLIB) {
+        t = ff_yuv2rgb_init_mlib(c);
+    } else if (HAVE_ALTIVEC) {
+        t = ff_yuv2rgb_init_altivec(c);
+    } else if (ARCH_BFIN) {
         t = ff_yuv2rgb_get_func_ptr_bfin(c);
-#endif
+    }
 
     if (t)
         return t;

From 11ffefefdbf92016e764e148e41ec08f9bc1a2db Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 10:11:26 -0400
Subject: [PATCH 234/830] swscale: remove duplicated x86/ functions.

---
 libswscale/x86/swscale_template.c | 214 +++---------------------------
 1 file changed, 17 insertions(+), 197 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index e9e093780d..d0671315ae 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -925,19 +925,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
 }
 
-static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
-{
-    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrSrc, chrFilterSize,
-                 dest, uDest, dstW, chrDstW, dstFormat);
-}
-
 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-    int i;
     if(!(c->flags & SWS_BITEXACT)) {
         long p= 4;
         const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
@@ -969,38 +959,8 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
         }
         return;
     }
-    for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
-
-        if (val&256) {
-            if (val<0) val=0;
-            else       val=255;
-        }
-
-        dest[i]= val;
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++) {
-            int u=(chrSrc[i       ]+64)>>7;
-            int v=(chrSrc[i + VOFW]+64)>>7;
-
-            if ((u|v)&256) {
-                if (u<0)        u=0;
-                else if (u>255) u=255;
-                if (v<0)        v=0;
-                else if (v>255) v=255;
-            }
-
-            uDest[i]= u;
-            vDest[i]= v;
-        }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest)
-        for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
-            aDest[i]= av_clip_uint8(val);
-        }
+    yuv2yuv1_c(c, lumSrc, chrSrc, alpSrc, dest, uDest, vDest, aDest,
+               dstW, chrDstW);
 }
 
 
@@ -1182,10 +1142,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
 {
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
     if(!(c->flags & SWS_BITEXACT)) {
         switch(c->dstFormat) {
         //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
@@ -1320,7 +1276,8 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
         default: break;
         }
     }
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+    yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
+                  dest, dstW, yalpha, uvalpha, y);
 }
 
 /**
@@ -1329,18 +1286,14 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
 static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
 {
-    const int yalpha1=0;
-    int i;
-
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (flags&SWS_FULL_CHR_H_INT) {
-        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-        return;
-    }
-
     if(!(flags & SWS_BITEXACT)) {
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (flags&SWS_FULL_CHR_H_INT) {
+            c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
+            return;
+        }
+
         if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
             switch(dstFormat) {
             case PIX_FMT_RGB32:
@@ -1555,11 +1508,8 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
             }
         }
     }
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    }
+    yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
+                  dstW, uvalpha, dstFormat, flags, y);
 }
 
 //FIXME yuy2* can read up to 7 samples too much
@@ -1866,20 +1816,6 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int r= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-    assert(src1 == src2);
-}
-
 static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
@@ -1891,20 +1827,6 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
 }
 
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++) {
-        int r= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int b= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-}
-
 
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
@@ -2061,37 +1983,6 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
     }
 }
 
-//FIXME all pal and rgb srcFormats could do this convertion as well
-//FIXME all scalers more complex than bilinear could do half of this transform
-static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
-        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
-    }
-}
-static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
-        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
-    }
-}
-static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
-}
-static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*14071 + 33561947)>>14;
-}
-
 #define FAST_BILINEAR_X86 \
     "subl    %%edi, %%esi    \n\t" /*  src[xx+1] - src[xx] */                   \
     "imull   %%ecx, %%esi    \n\t" /* (src[xx+1] - src[xx])*xalpha */           \
@@ -2212,33 +2103,6 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 #endif
 }
 
-      // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
-                                   const int16_t *hLumFilter,
-                                   const int16_t *hLumFilterPos, int hLumFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal, int isAlpha)
-{
-    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
-    void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
-
-    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
-
-    if (toYV12) {
-        toYV12(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-
-    if (!c->hyscale_fast) {
-        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
-    }
-
-    if (convertRange)
-        convertRange(dst, dstWidth);
-}
-
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
                                         long dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
@@ -2345,33 +2209,6 @@ which is needed to support GCC 4.0. */
 #endif
 }
 
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
-                                   int srcW, int xInc, const int16_t *hChrFilter,
-                                   const int16_t *hChrFilterPos, int hChrFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal)
-{
-
-    src1 += c->chrSrcOffset;
-    src2 += c->chrSrcOffset;
-
-    if (c->chrToYV12) {
-        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-
-    if (!c->hcscale_fast) {
-        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
-    }
-
-    if (c->chrConvertRange)
-        c->chrConvertRange(dst, dstWidth);
-}
-
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
@@ -2509,12 +2346,12 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
             assert(lumBufIndex < 2*vLumBufSize);
             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
             assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
                             hLumFilter, hLumFilterPos, hLumFilterSize,
                             formatConvBuffer,
                             pal, 0);
             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
+                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
                                 hLumFilter, hLumFilterPos, hLumFilterSize,
                                 formatConvBuffer,
                                 pal, 1);
@@ -2532,7 +2369,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
             //FIXME replace parameters through context struct (some at least)
 
             if (c->needs_hcscale)
-                RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
                                 hChrFilter, hChrFilterPos, hChrFilterSize,
                                 formatConvBuffer,
                                 pal);
@@ -2740,7 +2577,6 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 
-    c->yuv2nv12X    = RENAME(yuv2nv12X   );
     c->yuv2yuv1     = RENAME(yuv2yuv1    );
     c->yuv2yuvX     = RENAME(yuv2yuvX    );
     c->yuv2packed1  = RENAME(yuv2packed1 );
@@ -2772,13 +2608,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
         default: break;
     }
-    if (c->chrSrcHSubSample) {
-        switch(srcFormat) {
-        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
-        default: break;
-        }
-    } else {
+    if (!c->chrSrcHSubSample) {
         switch(srcFormat) {
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
         case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
@@ -2808,14 +2638,4 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         default: break;
         }
     }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->srcRange) {
-            c->lumConvertRange = RENAME(lumRangeFromJpeg);
-            c->chrConvertRange = RENAME(chrRangeFromJpeg);
-        } else {
-            c->lumConvertRange = RENAME(lumRangeToJpeg);
-            c->chrConvertRange = RENAME(chrRangeToJpeg);
-        }
-    }
 }

From fe43d5d71e1b272e1176232a34074c8c9bc0c61d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sat, 14 May 2011 14:45:27 -0400
Subject: [PATCH 235/830] swscale: remove duplicate code in ppc/ subdirectory.

---
 libswscale/ppc/swscale_template.c | 793 +-----------------------------
 libswscale/swscale.c              |   4 +-
 2 files changed, 3 insertions(+), 794 deletions(-)

diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 3a569f1b78..7968177b52 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -23,69 +23,16 @@
 #include "swscale_altivec_template.c"
 #endif
 
+#if COMPILE_TEMPLATE_ALTIVEC
 static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-#if COMPILE_TEMPLATE_ALTIVEC
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
                           chrFilter, chrSrc, chrFilterSize,
                           dest, uDest, vDest, dstW, chrDstW);
-#else //COMPILE_TEMPLATE_ALTIVEC
-    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrSrc, chrFilterSize,
-                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
-#endif //!COMPILE_TEMPLATE_ALTIVEC
 }
 
-static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
-{
-    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrSrc, chrFilterSize,
-                 dest, uDest, dstW, chrDstW, dstFormat);
-}
-
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
-
-        if (val&256) {
-            if (val<0) val=0;
-            else       val=255;
-        }
-
-        dest[i]= val;
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++) {
-            int u=(chrSrc[i       ]+64)>>7;
-            int v=(chrSrc[i + VOFW]+64)>>7;
-
-            if ((u|v)&256) {
-                if (u<0)        u=0;
-                else if (u>255) u=255;
-                if (v<0)        v=0;
-                else if (v>255) v=255;
-            }
-
-            uDest[i]= u;
-            vDest[i]= v;
-        }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest)
-        for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
-            aDest[i]= av_clip_uint8(val);
-        }
-}
-
-
 /**
  * vertical scale YV12 to RGB
  */
@@ -93,7 +40,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
-#if COMPILE_TEMPLATE_ALTIVEC
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
     if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
@@ -104,751 +50,16 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                                    chrFilter, chrSrc, chrFilterSize,
                                    dest, dstW, dstY);
     else
-#endif
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                        chrFilter, chrSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
-{
-    const int yalpha1=0;
-    int i;
-
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (flags&SWS_FULL_CHR_H_INT) {
-        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-        return;
-    }
-
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    }
-}
-
-//FIXME yuy2* can read up to 7 samples too much
-
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i];
-}
-
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[4*i + 1];
-        dstV[i]= src1[4*i + 3];
-    }
-    assert(src1 == src2);
-}
-
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[2*i + 1];
-        dstV[i]= src2[2*i + 1];
-    }
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i+1];
-}
-
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[4*i + 0];
-        dstV[i]= src1[4*i + 2];
-    }
-    assert(src1 == src2);
-}
-
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[2*i];
-        dstV[i]= src2[2*i];
-    }
-}
-
-static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, long width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst1[i] = src[2*i+0];
-        dst2[i] = src[2*i+1];
-    }
-}
-
-static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
-{
-    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
-}
-
-static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
-{
-    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
-}
-
-
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src[i*3+0];
-        int g= src[i*3+1];
-        int r= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-}
-
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int r= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-    assert(src1 == src2);
-}
-
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int r= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-    assert(src1 == src2);
-}
-
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int r= src[i*3+0];
-        int g= src[i*3+1];
-        int b= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-}
-
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++) {
-        int r= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int b= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-}
-
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++) {
-        int r= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int b= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-}
-
-
-// bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
-{
-#if COMPILE_TEMPLATE_ALTIVEC
-    hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
-#else
-    int i;
-    for (i=0; i<dstW; i++) {
-        int j;
-        int srcPos= filterPos[i];
-        int val=0;
-        //printf("filterPos: %d\n", filterPos[i]);
-        for (j=0; j<filterSize; j++) {
-            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
-            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
-        }
-        //filter += hFilterSize;
-        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
-        //dst[i] = val>>7;
-    }
-#endif /* COMPILE_TEMPLATE_ALTIVEC */
-}
-
-//FIXME all pal and rgb srcFormats could do this convertion as well
-//FIXME all scalers more complex than bilinear could do half of this transform
-static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
-        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
-    }
-}
-static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
-        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
-    }
-}
-static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
-}
-static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*14071 + 33561947)>>14;
-}
-
-static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src, int srcW,
-                                        int xInc)
-{
-    int i;
-    unsigned int xpos=0;
-    for (i=0;i<dstWidth;i++) {
-        register unsigned int xx=xpos>>16;
-        register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
-        xpos+=xInc;
-    }
-}
-
-      // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
-                                   const int16_t *hLumFilter,
-                                   const int16_t *hLumFilterPos, int hLumFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal, int isAlpha)
-{
-    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
-    void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
-
-    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
-
-    if (toYV12) {
-        toYV12(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-
-    if (!c->hyscale_fast) {
-        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
-    }
-
-    if (convertRange)
-        convertRange(dst, dstWidth);
-}
-
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src1,
-                                        const uint8_t *src2, int srcW, int xInc)
-{
-    int i;
-    unsigned int xpos=0;
-    for (i=0;i<dstWidth;i++) {
-        register unsigned int xx=xpos>>16;
-        register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
-        dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
-        /* slower
-        dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
-        dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
-        */
-        xpos+=xInc;
-    }
-}
-
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
-                                   int srcW, int xInc, const int16_t *hChrFilter,
-                                   const int16_t *hChrFilterPos, int hChrFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal)
-{
-
-    src1 += c->chrSrcOffset;
-    src2 += c->chrSrcOffset;
-
-    if (c->chrToYV12) {
-        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-
-    if (!c->hcscale_fast) {
-        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
-    }
-
-    if (c->chrConvertRange)
-        c->chrConvertRange(dst, dstWidth);
-}
-
-#define DEBUG_SWSCALE_BUFFERS 0
-#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-
-static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    /* load a few things into local vars to make the code more readable? and faster */
-    const int srcW= c->srcW;
-    const int dstW= c->dstW;
-    const int dstH= c->dstH;
-    const int chrDstW= c->chrDstW;
-    const int chrSrcW= c->chrSrcW;
-    const int lumXInc= c->lumXInc;
-    const int chrXInc= c->chrXInc;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    const int flags= c->flags;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int16_t *hLumFilter= c->hLumFilter;
-    int16_t *hChrFilter= c->hChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int hLumFilterSize= c->hLumFilterSize;
-    const int hChrFilterSize= c->hChrFilterSize;
-    int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
-    int16_t **alpPixBuf= c->alpPixBuf;
-    const int vLumBufSize= c->vLumBufSize;
-    const int vChrBufSize= c->vChrBufSize;
-    uint8_t *formatConvBuffer= c->formatConvBuffer;
-    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
-    int lastDstY;
-    uint32_t *pal=c->pal_yuv;
-
-    /* vars which will change and which we need to store back in the context */
-    int dstY= c->dstY;
-    int lumBufIndex= c->lumBufIndex;
-    int chrBufIndex= c->chrBufIndex;
-    int lastInLumBuf= c->lastInLumBuf;
-    int lastInChrBuf= c->lastInChrBuf;
-
-    if (isPacked(c->srcFormat)) {
-        src[0]=
-        src[1]=
-        src[2]=
-        src[3]= src[0];
-        srcStride[0]=
-        srcStride[1]=
-        srcStride[2]=
-        srcStride[3]= srcStride[0];
-    }
-    srcStride[1]<<= c->vChrDrop;
-    srcStride[2]<<= c->vChrDrop;
-
-    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
-                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
-                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
-    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
-                   srcSliceY,    srcSliceH,    dstY,    dstH);
-    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
-                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
-
-    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
-        static int warnedAlready=0; //FIXME move this into the context perhaps
-        if (flags & SWS_PRINT_INFO && !warnedAlready) {
-            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
-                   "         ->cannot do aligned memory accesses anymore\n");
-            warnedAlready=1;
-        }
-    }
-
-    /* Note the user might start scaling the picture in the middle so this
-       will not get executed. This is not really intended but works
-       currently, so people might do it. */
-    if (srcSliceY ==0) {
-        lumBufIndex=-1;
-        chrBufIndex=-1;
-        dstY=0;
-        lastInLumBuf= -1;
-        lastInChrBuf= -1;
-    }
-
-    lastDstY= dstY;
-
-    for (;dstY < dstH; dstY++) {
-        unsigned char *dest =dst[0]+dstStride[0]*dstY;
-        const int chrDstY= dstY>>c->chrDstVSubSample;
-        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
-
-        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
-        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
-        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
-        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
-        int enough_lines;
-
-        //handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
-        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
-        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
-        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
-
-        DEBUG_BUFFERS("dstY: %d\n", dstY);
-        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
-                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
-        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
-                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
-
-        // Do we have enough lines in this slice to output the dstY line
-        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
-
-        if (!enough_lines) {
-            lastLumSrcY = srcSliceY + srcSliceH - 1;
-            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
-            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
-                                            lastLumSrcY, lastChrSrcY);
-        }
-
-        //Do horizontal scaling
-        while(lastInLumBuf < lastLumSrcY) {
-            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
-            lumBufIndex++;
-            assert(lumBufIndex < 2*vLumBufSize);
-            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-            assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
-                            hLumFilter, hLumFilterPos, hLumFilterSize,
-                            formatConvBuffer,
-                            pal, 0);
-            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
-                                hLumFilter, hLumFilterPos, hLumFilterSize,
-                                formatConvBuffer,
-                                pal, 1);
-            lastInLumBuf++;
-            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
-                               lumBufIndex,    lastInLumBuf);
-        }
-        while(lastInChrBuf < lastChrSrcY) {
-            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
-            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
-            chrBufIndex++;
-            assert(chrBufIndex < 2*vChrBufSize);
-            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
-            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-            //FIXME replace parameters through context struct (some at least)
-
-            if (c->needs_hcscale)
-                RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                                hChrFilter, hChrFilterPos, hChrFilterSize,
-                                formatConvBuffer,
-                                pal);
-            lastInChrBuf++;
-            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
-                               chrBufIndex,    lastInChrBuf);
-        }
-        //wrap buf index around to stay inside the ring buffer
-        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
-        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
-        if (!enough_lines)
-            break; //we can't output a dstY line so let's try with the next slice
-
-        if (dstY < dstH-2) {
-            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                c->yuv2nv12X(c,
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
-                    const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrBuf= chrSrcPtr[0];
-                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
-                } else { //General YV12
-                    c->yuv2yuvX(c,
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL,
-                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
-                    int lumAlpha= vLumFilter[2*dstY+1];
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    lumMmxFilter[2]=
-                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
-                    chrMmxFilter[2]=
-                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                       dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
-                } else { //general RGB
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c,
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packedX(c,
-                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                       alpSrcPtr, dest, dstW, dstY);
-                    }
-                }
-            }
-        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
-            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12XinC(
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else {
-                    yuv2yuvXinC(
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbXinC_full(c,
-                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                     alpSrcPtr, dest, dstW, dstY);
-                } else {
-                    yuv2packedXinC(c,
-                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                   alpSrcPtr, dest, dstW, dstY);
-                }
-            }
-        }
-    }
-
-    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
-        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
-
-    /* store changed local vars back in the context */
-    c->dstY= dstY;
-    c->lumBufIndex= lumBufIndex;
-    c->chrBufIndex= chrBufIndex;
-    c->lastInLumBuf= lastInLumBuf;
-    c->lastInChrBuf= lastInChrBuf;
-
-    return dstY - lastDstY;
-}
+#endif
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 
-    c->yuv2nv12X    = RENAME(yuv2nv12X   );
-    c->yuv2yuv1     = RENAME(yuv2yuv1    );
     c->yuv2yuvX     = RENAME(yuv2yuvX    );
-    c->yuv2packed1  = RENAME(yuv2packed1 );
-    c->yuv2packed2  = RENAME(yuv2packed2 );
     c->yuv2packedX  = RENAME(yuv2packedX );
-
-    c->hScale       = RENAME(hScale      );
-
-    if (c->flags & SWS_FAST_BILINEAR)
-    {
-        c->hyscale_fast = RENAME(hyscale_fast);
-        c->hcscale_fast = RENAME(hcscale_fast);
-    }
-
-    switch(srcFormat) {
-        case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
-        case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
-        case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
-        case PIX_FMT_NV21     : c->chrToYV12 = RENAME(nv21ToUV); break;
-        case PIX_FMT_YUV420P16BE:
-        case PIX_FMT_YUV422P16BE:
-        case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
-        case PIX_FMT_YUV420P16LE:
-        case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
-    }
-    if (c->chrSrcHSubSample) {
-        switch(srcFormat) {
-        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
-        }
-    } else {
-        switch(srcFormat) {
-        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
-        }
-    }
-
-    switch (srcFormat) {
-    case PIX_FMT_YUYV422  :
-    case PIX_FMT_YUV420P16BE:
-    case PIX_FMT_YUV422P16BE:
-    case PIX_FMT_YUV444P16BE:
-    case PIX_FMT_Y400A    :
-    case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
-    case PIX_FMT_UYVY422  :
-    case PIX_FMT_YUV420P16LE:
-    case PIX_FMT_YUV422P16LE:
-    case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
-    case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
-    case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
-    }
-    if (c->alpPixBuf) {
-        switch (srcFormat) {
-        case PIX_FMT_Y400A  : c->alpToYV12 = RENAME(yuy2ToY); break;
-        }
-    }
-
-    switch (srcFormat) {
-    case PIX_FMT_Y400A  :
-        c->alpSrcOffset = 1;
-        break;
-    case PIX_FMT_RGB32  :
-    case PIX_FMT_BGR32  :
-        c->alpSrcOffset = 3;
-        break;
-    case PIX_FMT_RGB48LE:
-        c->lumSrcOffset = 1;
-        c->chrSrcOffset = 1;
-        c->alpSrcOffset = 1;
-        break;
-    }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->srcRange) {
-            c->lumConvertRange = RENAME(lumRangeFromJpeg);
-            c->chrConvertRange = RENAME(chrRangeFromJpeg);
-        } else {
-            c->lumConvertRange = RENAME(lumRangeToJpeg);
-            c->chrConvertRange = RENAME(chrRangeToJpeg);
-        }
-    }
-
-    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
-          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
-        c->needs_hcscale = 1;
 }
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 749a276e5a..4977a3a736 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1250,10 +1250,8 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
     } else
 #endif
 #if HAVE_ALTIVEC
-    if (cpu_flags & AV_CPU_FLAG_ALTIVEC) {
+    if (cpu_flags & AV_CPU_FLAG_ALTIVEC)
         sws_init_swScale_altivec(c);
-        return swScale_altivec;
-    } else
 #endif
 
     return swScale_c;

From 1dd4f4be5a82409a162b42b120fc5054fef9d899 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 10:19:41 -0400
Subject: [PATCH 236/830] swscale: remove AMD3DNOW "optimizations".

The functions are identical to their MMX counterparts. Thus,
pretending that swscale is highly optimized for AMD3DNOW
extensions is a poorly executed practical joke at best.
---
 libswscale/swscale.c              | 22 ----------------------
 libswscale/x86/swscale_template.c | 15 ++-------------
 2 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 4977a3a736..5b71a6f1c0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1178,7 +1178,6 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 //Plain C versions
 
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
 #define COMPILE_TEMPLATE_ALTIVEC 0
 
 #include "swscale_template.c"
@@ -1195,9 +1194,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #if HAVE_MMX
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMX2
-#undef COMPILE_TEMPLATE_AMD3DNOW
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX
 #include "x86/swscale_template.c"
 #endif
@@ -1206,24 +1203,11 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #if HAVE_MMX2
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMX2
-#undef COMPILE_TEMPLATE_AMD3DNOW
 #define COMPILE_TEMPLATE_MMX2 1
-#define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX2
 #include "x86/swscale_template.c"
 #endif
 
-//3DNOW versions
-#if HAVE_AMD3DNOW
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX2
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNow
-#include "x86/swscale_template.c"
-#endif
-
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -1237,12 +1221,6 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
         return swScale_MMX2;
     } else
 #endif
-#if HAVE_AMD3DNOW
-    if (cpu_flags & AV_CPU_FLAG_3DNOW) {
-        sws_init_swScale_3DNow(c);
-        return swScale_3DNow;
-    } else
-#endif
 #if HAVE_MMX
     if (cpu_flags & AV_CPU_FLAG_MMX) {
         sws_init_swScale_MMX(c);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index d0671315ae..63e0f72cb2 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -22,23 +22,14 @@
 
 #undef REAL_MOVNTQ
 #undef MOVNTQ
-#undef PAVGB
 #undef PREFETCH
 
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#elif COMPILE_TEMPLATE_MMX2
+#if COMPILE_TEMPLATE_MMX2
 #define PREFETCH "prefetchnta"
 #else
 #define PREFETCH  " # nop"
 #endif
 
-#if COMPILE_TEMPLATE_MMX2
-#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif COMPILE_TEMPLATE_AMD3DNOW
-#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
-#endif
-
 #if COMPILE_TEMPLATE_MMX2
 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
 #else
@@ -2559,9 +2550,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
     if (COMPILE_TEMPLATE_MMX2)      __asm__ volatile("sfence":::"memory");
-    /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-    if (COMPILE_TEMPLATE_AMD3DNOW)  __asm__ volatile("femms" :::"memory");
-    else                            __asm__ volatile("emms"  :::"memory");
+    __asm__ volatile("emms"  :::"memory");
 
     /* store changed local vars back in the context */
     c->dstY= dstY;

From b9eb2136af4e9b6700437d996eae731ffca07702 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 10:46:40 -0400
Subject: [PATCH 237/830] swscale: remove dead macro WRITEBGR24OLD.

---
 libswscale/x86/swscale_template.c | 56 -------------------------------
 1 file changed, 56 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 63e0f72cb2..e03fbd44e2 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -700,62 +700,6 @@
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
-#define WRITEBGR24OLD(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1             \n\t" /* B */\
-    "movq      %%mm5, %%mm6             \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
-    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
-    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
-    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
-    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
-\
-    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
-    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
-    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
-    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
-    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
-    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
-    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
-    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
-\
-    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
-    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
-    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
-    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
-    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
-    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
-    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
-    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
-\
-    MOVNTQ(%%mm0,   (dst))\
-    MOVNTQ(%%mm2,  8(dst))\
-    MOVNTQ(%%mm3, 16(dst))\
-    "add         $24, "#dst"            \n\t"\
-\
-    "add          $8, "#index"          \n\t"\
-    "cmp     "#dstw", "#index"          \n\t"\
-    " jb          1b                    \n\t"
-
 #define WRITEBGR24MMX(dst, dstw, index) \
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
     "movq      %%mm2, %%mm1     \n\t" /* B */\

From ab088f7d2842b9c658a8906062a96be37fbc9981 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Mon, 21 Mar 2011 12:04:10 +0100
Subject: [PATCH 238/830] ape: Allow demuxing of files with metadata tags.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/ape.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/ape.c b/libavformat/ape.c
index 956036d5a8..0bc7737fde 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -159,8 +159,8 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
     int total_blocks;
     int64_t pts;
 
-    /* TODO: Skip any leading junk such as id3v2 tags */
-    ape->junklength = 0;
+    /* Skip any leading junk such as id3v2 tags */
+    ape->junklength = avio_tell(pb);
 
     tag = avio_rl32(pb);
     if (tag != MKTAG('M', 'A', 'C', ' '))
@@ -276,7 +276,7 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
     ape->frames[0].nblocks = ape->blocksperframe;
     ape->frames[0].skip    = 0;
     for (i = 1; i < ape->totalframes; i++) {
-        ape->frames[i].pos      = ape->seektable[i]; //ape->frames[i-1].pos + ape->blocksperframe;
+        ape->frames[i].pos      = ape->seektable[i] + ape->junklength;
         ape->frames[i].nblocks  = ape->blocksperframe;
         ape->frames[i - 1].size = ape->frames[i].pos - ape->frames[i - 1].pos;
         ape->frames[i].skip     = (ape->frames[i].pos - ape->frames[0].pos) & 3;

From 676eaf84335fa91ee6a699fc03207762b8a7d6ae Mon Sep 17 00:00:00 2001
From: Alexandre Colucci <alexandre@elgato.com>
Date: Fri, 25 Mar 2011 11:25:02 +0100
Subject: [PATCH 239/830] dvdsubdec: fix incorrect colors.

On DVD and HD-DVD colors are stored in the order YCrCb (and not YCbCr) as mentioned in the specifications:
see DVD Specifications for Read-Only Disc / Part 3, 4.3 Program Chain Information (7) PGC_SP_PLT
see DVD Specifications for High Definition Disc, 5.2 Navigation for Standard Content (11) PGC_SDSP_PLT
see DVD Specifications for High Definition Disc, 5.2 Navigation for Standard Content (12) PGC_HDSP_PLT
see DVD Specifications for High Definition Disc, 5.5 Presentation Data (4) SET_COLOR2

When decoding a DVD or HD-DVD subtitle, the colors were incorrectly set.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/dvdsubdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 87eb53baaa..8f3ba63229 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -34,8 +34,8 @@ static void yuv_a_to_rgba(const uint8_t *ycbcr, const uint8_t *alpha, uint32_t *
 
     for (i = num_values; i > 0; i--) {
         y = *ycbcr++;
-        cb = *ycbcr++;
         cr = *ycbcr++;
+        cb = *ycbcr++;
         YUV_TO_RGB1_CCIR(cb, cr);
         YUV_TO_RGB2_CCIR(r, g, b, y);
         *rgba++ = (*alpha++ << 24) | (r << 16) | (g << 8) | b;

From d980d7b1295290cedd978ef53118513838aa1484 Mon Sep 17 00:00:00 2001
From: Alexandre Colucci <alexandre@elgato.com>
Date: Fri, 25 Mar 2011 17:31:28 +0100
Subject: [PATCH 240/830] pgssubdec: fix incorrect colors.

On Blu-ray colors are stored in the order YCrCb (and not YCbCr) as mentioned in the specifications:
see System Description Blu-ray Disc Read-Only Format, 9.14.4.2.2.1 Palette Definition Segment

When decoding a Blu-ray subtitle, the colors were incorrectly set.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/pgssubdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index ea53e55b35..a480da1168 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -246,8 +246,8 @@ static void parse_palette_segment(AVCodecContext *avctx,
     while (buf < buf_end) {
         color_id  = bytestream_get_byte(&buf);
         y         = bytestream_get_byte(&buf);
-        cb        = bytestream_get_byte(&buf);
         cr        = bytestream_get_byte(&buf);
+        cb        = bytestream_get_byte(&buf);
         alpha     = bytestream_get_byte(&buf);
 
         YUV_TO_RGB1(cb, cr);

From 4515f9b58a51eb0af81c1ed1fd99889fcdc5ae91 Mon Sep 17 00:00:00 2001
From: Ilya <ilyaDOTilba@gmail.com>
Date: Sat, 26 Mar 2011 17:13:36 +0100
Subject: [PATCH 241/830] rtsp: use strtoul to parse rtptime and seq values.

strtol could return negative values, leading to various error messages,
mainly "non-monotonically increasing dts".

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/rtsp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index 2ebf7e0510..2d1438d3ed 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -744,9 +744,9 @@ static void rtsp_parse_rtp_info(RTSPState *rt, const char *p)
         if (!strcmp(key, "url"))
             av_strlcpy(url, value, sizeof(url));
         else if (!strcmp(key, "seq"))
-            seq = strtol(value, NULL, 10);
+            seq = strtoul(value, NULL, 10);
         else if (!strcmp(key, "rtptime"))
-            rtptime = strtol(value, NULL, 10);
+            rtptime = strtoul(value, NULL, 10);
         if (*p == ',') {
             handle_rtp_info(rt, url, seq, rtptime);
             url[0] = '\0';

From 7ca5338b49aa9967c9b18503490ca656e3bd6c5d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 24 May 2011 01:17:25 +0200
Subject: [PATCH 242/830] tiff: set palette in the context when specified in
 TIFF_PAL tag

Since image initialization was moved after tag parsing, the
palette needs to be specified in the context and then copied
to the allocated image in init_image().

Fixes a regression with TIFF images that have palette data,
trac issue #230, file Test_Flate_8bpp.tif.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/tiff.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index ffa5c6837a..bea353275f 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -41,6 +41,8 @@ typedef struct TiffContext {
 
     int width, height;
     unsigned int bpp, bppcount;
+    uint32_t palette[256];
+    int palette_is_set;
     int le;
     enum TiffCompr compr;
     int invert;
@@ -257,11 +259,15 @@ static int init_image(TiffContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return ret;
     }
-    if (s->bpp == 8 && s->picture.data[1]){
-        /* make default grayscale pal */
-        pal = (uint32_t *) s->picture.data[1];
-        for (i = 0; i < 256; i++)
-            pal[i] = i * 0x010101;
+    if (s->avctx->pix_fmt == PIX_FMT_PAL8) {
+        if (s->palette_is_set) {
+            memcpy(s->picture.data[1], s->palette, sizeof(s->palette));
+        } else {
+            /* make default grayscale pal */
+            pal = (uint32_t *) s->picture.data[1];
+            for (i = 0; i < 256; i++)
+                pal[i] = i * 0x010101;
+        }
     }
     return 0;
 }
@@ -444,11 +450,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
         s->fill_order = value - 1;
         break;
     case TIFF_PAL:
-        if(s->avctx->pix_fmt != PIX_FMT_PAL8){
-            av_log(s->avctx, AV_LOG_ERROR, "Palette met but this is not palettized format\n");
-            return -1;
-        }
-        pal = (uint32_t *) s->picture.data[1];
+        pal = (uint32_t *) s->palette;
         off = type_sizes[type];
         rp = buf;
         gp = buf + count / 3 * off;
@@ -460,6 +462,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
             j |= tget(&bp, type, s->le) >> off;
             pal[i] = j;
         }
+        s->palette_is_set = 1;
         break;
     case TIFF_PLANAR:
         if(value == 2){

From e9735572113ab903b92cc0a7931eb894e7177f6e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 13:04:38 -0400
Subject: [PATCH 243/830] Move emms_c() from libavcodec to libavutil.

---
 libavcodec/dsputil.h | 20 +-------------------
 libavutil/internal.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 78d21527a5..74230cadbb 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -630,13 +630,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
     }
 }
 
-/**
- * Empty mmx state.
- * this must be called between any dsp function and float/double code.
- * for example sin(); dsp->idct_put(); emms_c(); cos()
- */
-#define emms_c()
-
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
@@ -654,18 +647,7 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
 void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
 
-#if HAVE_MMX
-
-#undef emms_c
-
-static inline void emms(void)
-{
-    __asm__ volatile ("emms;":::"memory");
-}
-
-#define emms_c() emms()
-
-#elif ARCH_ARM
+#if ARCH_ARM
 
 #if HAVE_NEON
 #   define STRIDE_ALIGN 16
diff --git a/libavutil/internal.h b/libavutil/internal.h
index e890ae7083..51e449d3fe 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -223,4 +223,18 @@
 #   define ONLY_IF_THREADS_ENABLED(x) NULL
 #endif
 
+#if HAVE_MMX
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+static av_always_inline void emms_c(void)
+{
+    __asm__ volatile ("emms" ::: "memory");
+}
+#else /* HAVE_MMX */
+#define emms_c()
+#endif /* HAVE_MMX */
+
 #endif /* AVUTIL_INTERNAL_H */

From c4fd283a467031fdbde5bca0963b20d5911eca91 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 13:04:46 -0400
Subject: [PATCH 244/830] swscale: use emms_c().

---
 libswscale/utils.c                | 6 ++----
 libswscale/x86/swscale_template.c | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index b49ec89be8..29fc975046 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -193,8 +193,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
     const int64_t fone= 1LL<<54;
     int ret= -1;
 
-    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
-        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
+    emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
 
     // NOTE: the +1 is for the MMX scaler which reads over the end
     FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail);
@@ -757,8 +756,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
     cpu_flags = av_get_cpu_flags();
     flags     = c->flags;
-    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
-        __asm__ volatile("emms\n\t"::: "memory");
+    emms_c();
     if (!rgb15to16) sws_rgb2rgb_init();
 
     unscaled = (srcW == dstW && srcH == dstH);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index e03fbd44e2..576e22c082 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2494,7 +2494,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
     if (COMPILE_TEMPLATE_MMX2)      __asm__ volatile("sfence":::"memory");
-    __asm__ volatile("emms"  :::"memory");
+    emms_c();
 
     /* store changed local vars back in the context */
     c->dstY= dstY;

From aaca69c130edf219110c616d2e236a3a27caf706 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 12:43:29 -0400
Subject: [PATCH 245/830] swscale: remove swScale_{c,MMX,MMX2} duplication.

---
 libswscale/swscale.c              |  15 +-
 libswscale/swscale_template.c     |  10 +
 libswscale/x86/swscale_template.c | 313 ++----------------------------
 3 files changed, 32 insertions(+), 306 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5b71a6f1c0..36b676bf06 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1214,18 +1214,13 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
 
     sws_init_swScale_c(c);
 
-#if HAVE_MMX2
-    // ordered per speed fastest first
-    if (cpu_flags & AV_CPU_FLAG_MMX2) {
-        sws_init_swScale_MMX2(c);
-        return swScale_MMX2;
-    } else
-#endif
 #if HAVE_MMX
-    if (cpu_flags & AV_CPU_FLAG_MMX) {
+    if (cpu_flags & AV_CPU_FLAG_MMX)
         sws_init_swScale_MMX(c);
-        return swScale_MMX;
-    } else
+#endif
+#if HAVE_MMX2
+    if (cpu_flags & AV_CPU_FLAG_MMX2)
+        sws_init_swScale_MMX2(c);
 #endif
 #if HAVE_ALTIVEC
     if (cpu_flags & AV_CPU_FLAG_ALTIVEC)
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index fe872561cb..0ff402806e 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -485,6 +485,11 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
+#if HAVE_MMX
+static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
+                                  int lastInLumBuf, int lastInChrBuf);
+#endif
+
 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                      int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 {
@@ -656,6 +661,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
         if (!enough_lines)
             break; //we can't output a dstY line so let's try with the next slice
 
+        if (HAVE_MMX) updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -780,6 +786,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
+    if (HAVE_MMX2 && av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
+        __asm__ volatile("sfence":::"memory");
+    emms_c();
+
     /* store changed local vars back in the context */
     c->dstY= dstY;
     c->lumBufIndex= lumBufIndex;
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 576e22c082..37a6bb5a0d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2144,179 +2144,29 @@ which is needed to support GCC 4.0. */
 #endif
 }
 
-#define DEBUG_SWSCALE_BUFFERS 0
-#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-
-static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[])
+#if !COMPILE_TEMPLATE_MMX2
+static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
+                                  int lastInLumBuf, int lastInChrBuf)
 {
-    /* load a few things into local vars to make the code more readable? and faster */
-    const int srcW= c->srcW;
-    const int dstW= c->dstW;
     const int dstH= c->dstH;
-    const int chrDstW= c->chrDstW;
-    const int chrSrcW= c->chrSrcW;
-    const int lumXInc= c->lumXInc;
-    const int chrXInc= c->chrXInc;
-    const enum PixelFormat dstFormat= c->dstFormat;
     const int flags= c->flags;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int16_t *hLumFilter= c->hLumFilter;
-    int16_t *hChrFilter= c->hChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int hLumFilterSize= c->hLumFilterSize;
-    const int hChrFilterSize= c->hChrFilterSize;
     int16_t **lumPixBuf= c->lumPixBuf;
     int16_t **chrPixBuf= c->chrPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
-    uint8_t *formatConvBuffer= c->formatConvBuffer;
-    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
-    int lastDstY;
-    uint32_t *pal=c->pal_yuv;
-
-    /* vars which will change and which we need to store back in the context */
-    int dstY= c->dstY;
-    int lumBufIndex= c->lumBufIndex;
-    int chrBufIndex= c->chrBufIndex;
-    int lastInLumBuf= c->lastInLumBuf;
-    int lastInChrBuf= c->lastInChrBuf;
-
-    if (isPacked(c->srcFormat)) {
-        src[0]=
-        src[1]=
-        src[2]=
-        src[3]= src[0];
-        srcStride[0]=
-        srcStride[1]=
-        srcStride[2]=
-        srcStride[3]= srcStride[0];
-    }
-    srcStride[1]<<= c->vChrDrop;
-    srcStride[2]<<= c->vChrDrop;
-
-    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
-                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
-                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
-    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
-                   srcSliceY,    srcSliceH,    dstY,    dstH);
-    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
-                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
-
-    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
-        static int warnedAlready=0; //FIXME move this into the context perhaps
-        if (flags & SWS_PRINT_INFO && !warnedAlready) {
-            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
-                   "         ->cannot do aligned memory accesses anymore\n");
-            warnedAlready=1;
-        }
-    }
-
-    /* Note the user might start scaling the picture in the middle so this
-       will not get executed. This is not really intended but works
-       currently, so people might do it. */
-    if (srcSliceY ==0) {
-        lumBufIndex=-1;
-        chrBufIndex=-1;
-        dstY=0;
-        lastInLumBuf= -1;
-        lastInChrBuf= -1;
-    }
-
-    lastDstY= dstY;
-
-    for (;dstY < dstH; dstY++) {
-        unsigned char *dest =dst[0]+dstStride[0]*dstY;
-        const int chrDstY= dstY>>c->chrDstVSubSample;
-        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
-
-        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
-        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
-        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
-        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
-        int enough_lines;
-
-        //handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
-        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
-        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
-        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
-
-        DEBUG_BUFFERS("dstY: %d\n", dstY);
-        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
-                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
-        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
-                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
-
-        // Do we have enough lines in this slice to output the dstY line
-        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
-
-        if (!enough_lines) {
-            lastLumSrcY = srcSliceY + srcSliceH - 1;
-            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
-            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
-                                            lastLumSrcY, lastChrSrcY);
-        }
-
-        //Do horizontal scaling
-        while(lastInLumBuf < lastLumSrcY) {
-            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
-            lumBufIndex++;
-            assert(lumBufIndex < 2*vLumBufSize);
-            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-            assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
-                            hLumFilter, hLumFilterPos, hLumFilterSize,
-                            formatConvBuffer,
-                            pal, 0);
-            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
-                                hLumFilter, hLumFilterPos, hLumFilterSize,
-                                formatConvBuffer,
-                                pal, 1);
-            lastInLumBuf++;
-            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
-                               lumBufIndex,    lastInLumBuf);
-        }
-        while(lastInChrBuf < lastChrSrcY) {
-            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
-            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
-            chrBufIndex++;
-            assert(chrBufIndex < 2*vChrBufSize);
-            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
-            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-            //FIXME replace parameters through context struct (some at least)
-
-            if (c->needs_hcscale)
-                hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                                hChrFilter, hChrFilterPos, hChrFilterSize,
-                                formatConvBuffer,
-                                pal);
-            lastInChrBuf++;
-            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
-                               chrBufIndex,    lastInChrBuf);
-        }
-        //wrap buf index around to stay inside the ring buffer
-        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
-        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
-        if (!enough_lines)
-            break; //we can't output a dstY line so let's try with the next slice
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int chrDstY= dstY>>c->chrDstVSubSample;
+    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
 
         c->blueDither= ff_dither8[dstY&1];
         if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
@@ -2324,7 +2174,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
         else
             c->greenDither= ff_dither4[dstY&1];
         c->redDither= ff_dither8[(dstY+1)&1];
-        if (dstY < dstH-2) {
+        if (dstY < dstH - 2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
@@ -2373,138 +2223,9 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
                         ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
                 }
             }
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                c->yuv2nv12X(c,
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
-                    const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrBuf= chrSrcPtr[0];
-                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
-                } else { //General YV12
-                    c->yuv2yuvX(c,
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL,
-                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
-                    int lumAlpha= vLumFilter[2*dstY+1];
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    lumMmxFilter[2]=
-                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
-                    chrMmxFilter[2]=
-                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                       dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
-                } else { //general RGB
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c,
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packedX(c,
-                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                       alpSrcPtr, dest, dstW, dstY);
-                    }
-                }
-            }
-        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
-            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12XinC(
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else {
-                    yuv2yuvXinC(
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbXinC_full(c,
-                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                     alpSrcPtr, dest, dstW, dstY);
-                } else {
-                    yuv2packedXinC(c,
-                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                   alpSrcPtr, dest, dstW, dstY);
-                }
-            }
         }
-    }
-
-    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
-        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
-
-    if (COMPILE_TEMPLATE_MMX2)      __asm__ volatile("sfence":::"memory");
-    emms_c();
-
-    /* store changed local vars back in the context */
-    c->dstY= dstY;
-    c->lumBufIndex= lumBufIndex;
-    c->chrBufIndex= chrBufIndex;
-    c->lastInLumBuf= lastInLumBuf;
-    c->lastInChrBuf= lastInChrBuf;
-
-    return dstY - lastDstY;
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {

From 566b5fbbb3753f71b60c4da491236a62232a8cf9 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 12:15:14 -0400
Subject: [PATCH 246/830] swscale: remove if(canMMX2BeUsed) conditional.

Instead, set function pointers conditionally during init. This
patch also reveals a whole branch of dead assembly code that is
therefore also removed.
---
 libswscale/x86/swscale_template.c | 99 +++----------------------------
 1 file changed, 8 insertions(+), 91 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 37a6bb5a0d..0bee76e664 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1918,19 +1918,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
     }
 }
 
-#define FAST_BILINEAR_X86 \
-    "subl    %%edi, %%esi    \n\t" /*  src[xx+1] - src[xx] */                   \
-    "imull   %%ecx, %%esi    \n\t" /* (src[xx+1] - src[xx])*xalpha */           \
-    "shll      $16, %%edi    \n\t"                                              \
-    "addl    %%edi, %%esi    \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */  \
-    "mov        %1, %%"REG_D"\n\t"                                              \
-    "shrl       $9, %%esi    \n\t"                                              \
-
+#if COMPILE_TEMPLATE_MMX2
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
                                         long dstWidth, const uint8_t *src, int srcW,
                                         int xInc)
 {
-#if COMPILE_TEMPLATE_MMX2
     int32_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
     int     canMMX2BeUsed  = c->canMMX2BeUsed;
@@ -1939,7 +1931,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 #if defined(PIC)
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
-    if (canMMX2BeUsed) {
+
         __asm__ volatile(
 #if defined(PIC)
             "mov               %%"REG_b", %5        \n\t"
@@ -1998,51 +1990,12 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 #endif
         );
         for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
-    } else {
-#endif /* COMPILE_TEMPLATE_MMX2 */
-    x86_reg xInc_shr16 = xInc >> 16;
-    uint16_t xInc_mask = xInc & 0xffff;
-    x86_reg dstWidth_reg = dstWidth;
-    //NO MMX just normal asm ...
-    __asm__ volatile(
-        "xor %%"REG_a", %%"REG_a"            \n\t" // i
-        "xor %%"REG_d", %%"REG_d"            \n\t" // xx
-        "xorl    %%ecx, %%ecx                \n\t" // xalpha
-        ".p2align                4           \n\t"
-        "1:                                  \n\t"
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        FAST_BILINEAR_X86
-        "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
-
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        FAST_BILINEAR_X86
-        "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
-        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
-
-
-        "add        $2, %%"REG_a"            \n\t"
-        "cmp        %2, %%"REG_a"            \n\t"
-        " jb        1b                       \n\t"
-
-
-        :: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask)
-        : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-    );
-#if COMPILE_TEMPLATE_MMX2
-    } //if MMX2 can't be used
-#endif
 }
 
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
                                         long dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
-#if COMPILE_TEMPLATE_MMX2
     int32_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
     int     canMMX2BeUsed  = c->canMMX2BeUsed;
@@ -2051,7 +2004,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
 #if defined(PIC)
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
-    if (canMMX2BeUsed) {
+
         __asm__ volatile(
 #if defined(PIC)
             "mov          %%"REG_b", %6         \n\t"
@@ -2101,48 +2054,8 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
             dst[i] = src1[srcW-1]*128;
             dst[i+VOFW] = src2[srcW-1]*128;
         }
-    } else {
-#endif /* COMPILE_TEMPLATE_MMX2 */
-        x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
-        uint16_t xInc_mask = xInc & 0xffff;
-        x86_reg dstWidth_reg = dstWidth;
-        __asm__ volatile(
-            "xor %%"REG_a", %%"REG_a"               \n\t" // i
-            "xor %%"REG_d", %%"REG_d"               \n\t" // xx
-            "xorl    %%ecx, %%ecx                   \n\t" // xalpha
-            ".p2align    4                          \n\t"
-            "1:                                     \n\t"
-            "mov        %0, %%"REG_S"               \n\t"
-            "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
-            "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
-            FAST_BILINEAR_X86
-            "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
-            "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
-            FAST_BILINEAR_X86
-            "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "addw       %4, %%cx                    \n\t" //xalpha += xInc&0xFFFF
-            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>16 + carry
-            "add        $1, %%"REG_a"               \n\t"
-            "cmp        %2, %%"REG_a"               \n\t"
-            " jb        1b                          \n\t"
-
-/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
-which is needed to support GCC 4.0. */
-#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
-            :: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
-#else
-            :: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
-#endif
-            "r" (src2)
-            : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-        );
-#if COMPILE_TEMPLATE_MMX2
-    } //if MMX2 can't be used
-#endif
 }
+#endif /* COMPILE_TEMPLATE_MMX2 */
 
 #if !COMPILE_TEMPLATE_MMX2
 static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
@@ -2240,14 +2153,18 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     c->hScale       = RENAME(hScale      );
 
     // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
+#if COMPILE_TEMPLATE_MMX2
     if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
     {
         c->hyscale_fast = RENAME(hyscale_fast);
         c->hcscale_fast = RENAME(hcscale_fast);
     } else {
+#endif /* COMPILE_TEMPLATE_MMX2 */
         c->hyscale_fast = NULL;
         c->hcscale_fast = NULL;
+#if COMPILE_TEMPLATE_MMX2
     }
+#endif /* COMPILE_TEMPLATE_MMX2 */
 
     switch(srcFormat) {
         case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;

From 1bb0f0c925d616375b3b991d9c645e170b90c0a4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 12:30:55 -0400
Subject: [PATCH 247/830] swscale: remove if(bitexact) branch from functions.

Instead, only set the function pointers if bitexact flag is
not set during initialization. Since a change in flags triggers
a re-init anyway, this doesn't situations where flag values
change during runtime.
---
 libswscale/x86/swscale_template.c | 34 ++++++++++---------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 0bee76e664..678060f3de 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -831,7 +831,6 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-    if(!(c->flags & SWS_BITEXACT)) {
         if (c->flags & SWS_ACCURATE_RND) {
             if (uDest) {
                 YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
@@ -853,17 +852,11 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
 
             YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
         }
-        return;
-    }
-    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrSrc, chrFilterSize,
-                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
 }
 
 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-    if(!(c->flags & SWS_BITEXACT)) {
         long p= 4;
         const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
         uint8_t *dst[4]= {aDest, dest, uDest, vDest};
@@ -892,10 +885,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
                 }
             }
         }
-        return;
-    }
-    yuv2yuv1_c(c, lumSrc, chrSrc, alpSrc, dest, uDest, vDest, aDest,
-               dstW, chrDstW);
 }
 
 
@@ -908,7 +897,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    if(!(c->flags & SWS_BITEXACT)) {
+
         if (c->flags & SWS_ACCURATE_RND) {
             switch(c->dstFormat) {
             case PIX_FMT_RGB32:
@@ -1065,7 +1054,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 return;
             }
         }
-    }
+
     yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                    chrFilter, chrSrc, chrFilterSize,
                    alpSrc, dest, dstW, dstY);
@@ -1077,7 +1066,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
 {
-    if(!(c->flags & SWS_BITEXACT)) {
         switch(c->dstFormat) {
         //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
         case PIX_FMT_RGB32:
@@ -1208,9 +1196,8 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 "a" (&c->redDither)
             );
             return;
-        default: break;
         }
-    }
+
     yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
                   dest, dstW, yalpha, uvalpha, y);
 }
@@ -1221,7 +1208,6 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
 static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
 {
-    if(!(flags & SWS_BITEXACT)) {
         const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
         if (flags&SWS_FULL_CHR_H_INT) {
@@ -1442,7 +1428,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                 return;
             }
         }
-    }
+
     yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
                   dstW, uvalpha, dstFormat, flags, y);
 }
@@ -2144,11 +2130,13 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 
-    c->yuv2yuv1     = RENAME(yuv2yuv1    );
-    c->yuv2yuvX     = RENAME(yuv2yuvX    );
-    c->yuv2packed1  = RENAME(yuv2packed1 );
-    c->yuv2packed2  = RENAME(yuv2packed2 );
-    c->yuv2packedX  = RENAME(yuv2packedX );
+    if (!(c->flags & SWS_BITEXACT)) {
+        c->yuv2yuv1     = RENAME(yuv2yuv1    );
+        c->yuv2yuvX     = RENAME(yuv2yuvX    );
+        c->yuv2packed1  = RENAME(yuv2packed1 );
+        c->yuv2packed2  = RENAME(yuv2packed2 );
+        c->yuv2packedX  = RENAME(yuv2packedX );
+    }
 
     c->hScale       = RENAME(hScale      );
 

From a724ee6265bbf0acf4c43cd948dede661459727d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 15:32:03 -0400
Subject: [PATCH 248/830] swscale: unbreak the build on non-x86 systems.

---
 libswscale/swscale_template.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 0ff402806e..cd6c8c7a65 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -661,7 +661,9 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
         if (!enough_lines)
             break; //we can't output a dstY line so let's try with the next slice
 
-        if (HAVE_MMX) updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
+#if HAVE_MMX
+        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
+#endif
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -786,8 +788,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
-    if (HAVE_MMX2 && av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
+#if HAVE_MMX2
+    if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
         __asm__ volatile("sfence":::"memory");
+#endif
     emms_c();
 
     /* store changed local vars back in the context */

From 384d10360b163e881e30483f2e5467ac53a356d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Tue, 24 May 2011 22:24:08 +0200
Subject: [PATCH 249/830] Fix register types for LOAD_AB arguments, fixes
 compilation with NASM.

---
 libavcodec/x86/h264_deblock_10bit.asm | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index f5a13f1250..299f03d5cb 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -165,7 +165,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
     SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
-    LOAD_AB     m4, m5, r2, r3
+    LOAD_AB     m4, m5, r2d, r3d
     mov         r3, 32/mmsize
     mov         r2, r0
     sub         r0, r1
@@ -222,7 +222,7 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
     SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
-    LOAD_AB     m4, m5, r2, r3
+    LOAD_AB     m4, m5, r2d, r3d
     mov         r3, r1
     mova        am, m4
     add         r3, r1
@@ -352,7 +352,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
     %define mask2 m11
     shl        r2d, 2
     shl        r3d, 2
-    LOAD_AB    m12, m13, r2, r3
+    LOAD_AB    m12, m13, r2d, r3d
     mov         r2, r0
     sub         r0, r1
     sub         r0, r1
@@ -380,7 +380,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
 cglobal deblock_h_luma_10_%1, 5,7,15
     shl        r2d, 2
     shl        r3d, 2
-    LOAD_AB    m12, m13, r2, r3
+    LOAD_AB    m12, m13, r2d, r3d
     mov         r2, r1
     add         r2, r1
     add         r2, r1
@@ -851,7 +851,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
     CHROMA_V_LOAD r5
-    LOAD_AB     m4, m5, r2, r3
+    LOAD_AB     m4, m5, r2d, r3d
     LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
     pxor        m4, m4
     LOAD_TC     m6, r4
@@ -885,7 +885,7 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
     CHROMA_V_LOAD r4
-    LOAD_AB     m4, m5, r2, r3
+    LOAD_AB     m4, m5, r2d, r3d
     LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
     CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6
     CHROMA_V_STORE

From b44bbf908d000808b2480cd6e8ca571cd3846fd2 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 24 May 2011 23:26:24 +0200
Subject: [PATCH 250/830] matroskadec: fix reverted condition to accept
 combine_plane operation

---
 libavformat/matroskadec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index eca32773fe..569b95fbb2 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1518,7 +1518,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             /* if we have virtual track, mark the real tracks */
             for (j=0; j < track->operation.combine_planes.nb_elem; j++) {
                 char buf[32];
-                if (planes[j].type < MATROSKA_VIDEO_STEREO_PLANE_COUNT)
+                if (planes[j].type >= MATROSKA_VIDEO_STEREO_PLANE_COUNT)
                     continue;
                 snprintf(buf, sizeof(buf), "%s_%d",
                          matroska_video_stereo_plane[planes[j].type], i);

From 09525f7956508c420a5c56c8daee16e05aa3f528 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 24 May 2011 23:34:24 +0200
Subject: [PATCH 251/830] matroska: improve declaration of video_stereo_*
 constant tables

---
 libavformat/matroska.c |  4 ++--
 libavformat/matroska.h | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index fe9b0424a3..52481d7556 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -100,7 +100,7 @@ const AVMetadataConv ff_mkv_metadata_conv[] = {
     { 0 }
 };
 
-const char const *matroska_video_stereo_mode[] = {
+const char * const matroska_video_stereo_mode[MATROSKA_VIDEO_STEREO_MODE_COUNT] = {
     "mono",
     "left_right",
     "bottom_top",
@@ -118,7 +118,7 @@ const char const *matroska_video_stereo_mode[] = {
     "block_rl",
 };
 
-const char const *matroska_video_stereo_plane[] = {
+const char * const matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT] = {
     "left",
     "right",
     "background",
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 949195b757..32b04c5a4b 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -240,13 +240,13 @@ typedef struct CodecMime{
 /* max. depth in the EBML tree structure */
 #define EBML_MAX_DEPTH 16
 
-extern const CodecTags ff_mkv_codec_tags[];
-extern const CodecMime ff_mkv_mime_tags[];
-extern const AVMetadataConv ff_mkv_metadata_conv[];
-extern const char const *matroska_video_stereo_mode[];
-extern const char const *matroska_video_stereo_plane[];
-
 #define MATROSKA_VIDEO_STEREO_MODE_COUNT  15
 #define MATROSKA_VIDEO_STEREO_PLANE_COUNT  3
 
+extern const CodecTags ff_mkv_codec_tags[];
+extern const CodecMime ff_mkv_mime_tags[];
+extern const AVMetadataConv ff_mkv_metadata_conv[];
+extern const char * const matroska_video_stereo_mode[MATROSKA_VIDEO_STEREO_MODE_COUNT];
+extern const char * const matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT];
+
 #endif /* AVFORMAT_MATROSKA_H */

From 040e1c67edab57177adb88a7dd1cc9ed38d2b74e Mon Sep 17 00:00:00 2001
From: ami_stuff <ami_stuff@o2.pl>
Date: Tue, 24 May 2011 23:38:01 +0200
Subject: [PATCH 252/830] rawdec: fix decoding of QT WRAW files

Fix decoding of QT WRAW files.

From some tests it results that:

1. all of the AVI/MOV WRAW files (at least from the link posted
   to the trac ticket #108) need to be flipped
2. mov WRAW files need to use AVI color modes
3. assigning PAL8 mode by default to WRAW codec is not correct

Fix decoding of file CarltonMovie2.mov, fix trac issue #108.
---
 libavcodec/raw.c    | 1 -
 libavcodec/rawdec.c | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index 7481b2fba9..0bc04df071 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -123,7 +123,6 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') },
     { PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') },
     { PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */
-    { PIX_FMT_PAL8,    MKTAG('W', 'R', 'A', 'W') },
     { PIX_FMT_RGB555LE,MKTAG('L', '5', '5', '5') },
     { PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') },
     { PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') },
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 6bf749f4c3..9702f98918 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -90,6 +90,8 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
 
     if (avctx->codec_tag == MKTAG('r','a','w',' '))
         avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_mov, avctx->bits_per_coded_sample);
+    else if (avctx->codec_tag == MKTAG('W','R','A','W'))
+        avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_avi, avctx->bits_per_coded_sample);
     else if (avctx->codec_tag)
         avctx->pix_fmt = ff_find_pix_fmt(ff_raw_pix_fmt_tags, avctx->codec_tag);
     else if (avctx->pix_fmt == PIX_FMT_NONE && avctx->bits_per_coded_sample)
@@ -110,7 +112,7 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
     avctx->coded_frame= &context->pic;
 
     if((avctx->extradata_size >= 9 && !memcmp(avctx->extradata + avctx->extradata_size - 9, "BottomUp", 9)) ||
-       avctx->codec_tag == MKTAG( 3 ,  0 ,  0 ,  0 ))
+        avctx->codec_tag == MKTAG(3, 0, 0, 0) || avctx->codec_tag == MKTAG('W','R','A','W'))
         context->flip=1;
 
     return 0;

From 9bbd6a4cd89da4bfc9fd36fea5777a539a542b40 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 24 May 2011 20:11:53 +0100
Subject: [PATCH 253/830] configure: enable memalign_hack automatically when
 needed

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 5e3d6fbe47..c628ff65bb 100755
--- a/configure
+++ b/configure
@@ -2823,11 +2823,6 @@ check_header X11/extensions/XvMClib.h
 
 check_struct dxva2api.h DXVA_PictureParameters wDecodedPictureIndex
 
-if ! enabled_any memalign memalign_hack posix_memalign malloc_aligned &&
-     enabled_any $need_memalign ; then
-    die "Error, no aligned memory allocator but SSE enabled, disable it or use --enable-memalign-hack."
-fi
-
 disabled  zlib || check_lib   zlib.h      zlibVersion -lz   || disable  zlib
 disabled bzlib || check_lib2 bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib
 
@@ -3091,6 +3086,9 @@ check_deps $CONFIG_LIST       \
 
 enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
 
+! enabled_any memalign posix_memalign malloc_aligned &&
+    enabled_any $need_memalign && enable memalign_hack
+
 echo "install prefix            $prefix"
 echo "source path               $source_path"
 echo "C compiler                $cc"

From 8050dfdfb284874e3989301771e8011422e78c7a Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 17:48:41 +0200
Subject: [PATCH 254/830] swscale: Switch to ronalds yuv2yuvX16inC_template()
 its very similar to baptsites and supports alpha

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c | 46 +-------------------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 0bfa3537fd..c048180e1d 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -421,50 +421,6 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
     }
 }
 
-static av_always_inline void yuv2yuvXNinC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                                   const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                                   const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
-                                                   int dstW, int chrDstW, int big_endian, int depth)
-{
-    //FIXME Optimize (just quickly written not optimized..)
-    int i;
-
-    for (i = 0; i < dstW; i++) {
-        int val = 1 << (26-depth);
-        int j;
-
-        for (j = 0; j < lumFilterSize; j++)
-            val += lumSrc[j][i] * lumFilter[j];
-
-        if (big_endian) {
-            AV_WB16(&dest[i], av_clip(val >> (27-depth), 0, (1<<depth)-1));
-        } else {
-            AV_WL16(&dest[i], av_clip(val >> (27-depth), 0, (1<<depth)-1));
-        }
-    }
-
-    if (uDest) {
-        for (i = 0; i < chrDstW; i++) {
-            int u = 1 << (26-depth);
-            int v = 1 << (26-depth);
-            int j;
-
-            for (j = 0; j < chrFilterSize; j++) {
-                u += chrSrc[j][i       ] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
-            }
-
-            if (big_endian) {
-                AV_WB16(&uDest[i], av_clip(u >> (27-depth), 0, (1<<depth)-1));
-                AV_WB16(&vDest[i], av_clip(v >> (27-depth), 0, (1<<depth)-1));
-            } else {
-                AV_WL16(&uDest[i], av_clip(u >> (27-depth), 0, (1<<depth)-1));
-                AV_WL16(&vDest[i], av_clip(v >> (27-depth), 0, (1<<depth)-1));
-            }
-        }
-    }
-}
-
 static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                  const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
@@ -472,7 +428,7 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
 {
     if (isNBPS(dstFormat)) {
         const int depth = av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1+1;
-        yuv2yuvXNinC_template(lumFilter, lumSrc, lumFilterSize,
+        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
                               chrFilter, chrSrc, chrFilterSize,
                               alpSrc,
                               dest, uDest, vDest, aDest,

From 895b616146a0d6091b09dab4752191b5234964d0 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 May 2011 22:59:11 +0200
Subject: [PATCH 255/830] swscale: dont loose bits on planar >8bit yuv ind gray
 nput.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c             |  1 +
 libswscale/swscale_internal.h    |  4 ++
 libswscale/swscale_template.c    | 69 +++++++++++++++++++++-----------
 libswscale/utils.c               |  2 +-
 tests/ref/lavfi/pixfmts_scale_le | 26 ++++++------
 5 files changed, 65 insertions(+), 37 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index c048180e1d..e4e69cf819 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -60,6 +60,7 @@ untested special converters
 #include "swscale.h"
 #include "swscale_internal.h"
 #include "rgb2rgb.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/x86_cpu.h"
 #include "libavutil/avutil.h"
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 03c5bf9736..cffb51f61b 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -303,6 +303,10 @@ typedef struct SwsContext {
                    int xInc, const int16_t *filter, const int16_t *filterPos,
                    long filterSize);
 
+    void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW,
+                   int xInc, const int16_t *filter, const int16_t *filterPos,
+                   long filterSize, int shift);
+
     void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
     void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed.
 
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index e53cfc0752..33f9035618 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -2242,6 +2242,34 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 #endif /* COMPILE_MMX */
 }
 
+static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+                                    const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+    int i, j;
+    for (i=0; i<dstW; i++) {
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+    }
+}
+
+static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+                                    const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+    int i, j;
+    for (i=0; i<dstW; i++) {
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
+        }
+        dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+    }
+}
+
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
 static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
@@ -2421,7 +2449,9 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
         src= formatConvBuffer;
     }
 
-    if (!c->hyscale_fast) {
+    if (c->hScale16) {
+        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hyscale_fast) {
         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
@@ -2569,7 +2599,10 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
         src2= formatConvBuffer+VOFW;
     }
 
-    if (!c->hcscale_fast) {
+    if (c->hScale16) {
+        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hcscale_fast) {
         c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
         c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
@@ -2984,18 +3017,20 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
-        case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
-        case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
+        case PIX_FMT_GRAY16BE :
+        case PIX_FMT_YUV420P9BE:
         case PIX_FMT_YUV422P10BE:
-        case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
-        case PIX_FMT_YUV422P10LE:
-        case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
+        case PIX_FMT_YUV420P10BE:
         case PIX_FMT_YUV420P16BE:
         case PIX_FMT_YUV422P16BE:
-        case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
+        case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break;
+        case PIX_FMT_GRAY16LE :
+        case PIX_FMT_YUV420P9LE:
+        case PIX_FMT_YUV422P10LE:
+        case PIX_FMT_YUV420P10LE:
         case PIX_FMT_YUV420P16LE:
         case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
+        case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break;
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
@@ -3036,23 +3071,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     c->lumToYV12 = NULL;
     c->alpToYV12 = NULL;
     switch (srcFormat) {
-    case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
-    case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
-    case PIX_FMT_YUV422P10BE:
-    case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
-    case PIX_FMT_YUV422P10LE:
-    case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
     case PIX_FMT_YUYV422  :
-    case PIX_FMT_YUV420P16BE:
-    case PIX_FMT_YUV422P16BE:
-    case PIX_FMT_YUV444P16BE:
     case PIX_FMT_GRAY8A   :
-    case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
+                            c->lumToYV12 = RENAME(yuy2ToY); break;
     case PIX_FMT_UYVY422  :
-    case PIX_FMT_YUV420P16LE:
-    case PIX_FMT_YUV422P16LE:
-    case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
+                            c->lumToYV12 = RENAME(uyvyToY); break;
     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index ea44190ace..d227243fa3 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -890,7 +890,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
         }
-        if (usesHFilter) c->canMMX2BeUsed=0;
+        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0;
     }
     else
         c->canMMX2BeUsed=0;
diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index 5c32a363c8..29d1b2c340 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -9,8 +9,8 @@ bgr565le            3a514a298c6161a071ddf9963c06509d
 bgr8                7f007fa6c153a16e808a9c51605a4016
 bgra                a5e7040f9a80cccd65e5acf2ca09ace5
 gray                d7786a7d9d99ac74230cc045cab5632c
-gray16be            af39ce3a497f6734b157c8b94544f537
-gray16le            7ac1b788bcc472010df7a97e762485e0
+gray16be            5ba22d4802b40ec27e62abb22ad1d1cc
+gray16le            2d5e83aa875a4c3baa6fecf55e3223bf
 monob               88c4c050758e64d120f50c7eff694381
 monow               d31772ebaa877fc2a78565937f7f9673
 nv12                4676d59db43d657dc12841f6bc3ab452
@@ -27,20 +27,20 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         6d335e75b553da590135cf8bb999610c
-yuv420p10le         d510ddbabefd03ef39ec943fcb51b709
-yuv420p16be         29a0265764530070f5cd3251cc01f66a
-yuv420p16le         6f3a265b084a78baec229238d9f7945f
-yuv420p9be          ec4983b7a949c0472110a7a2c58e278a
-yuv420p9le          c136dce5913a722eee44ab72cff664b2
+yuv420p10be         c143e77e97d2f7d62c3b518857ba9f9b
+yuv420p10le         72d90eccf5c34691ff057dafb7447aa2
+yuv420p16be         01da53e7f4f9882d5189ec1b1165ee05
+yuv420p16le         165f9aaf5332e5d088f44534d8ed2bc9
+yuv420p9be          bb87fddca65d1742412c8d2b1caf96c6
+yuv420p9le          828eec50014a41258a5423c1fe56ac97
 yuv422p             918e37701ee7377d16a8a6c119c56a40
-yuv422p10le         aeb0ef08a883f43429ca9d886d8fc095
-yuv422p16be         ef3e865fc1d0c68977c735323c50af6e
-yuv422p16le         428a9b96214c09cb5a983ce36d6961ff
+yuv422p10le         a10c4a5837547716f13cd61918b145f9
+yuv422p16be         961860aa4f229e09f1249910c687081c
+yuv422p16le         7695ee42c0581279bbe68de81deb7aee
 yuv440p             461503fdb9b90451020aa3b25ddf041c
 yuv444p             81b2eba962d12e8d64f003ac56f6faf2
-yuv444p16be         99a3738c70c8fbdc5a0e4ad4bf50648d
-yuv444p16le         385d0cc5240d62da0871915be5d86f0a
+yuv444p16be         5f924c2b385826106300cecc4ef4d2df
+yuv444p16le         40a55a85858508138b7661c83d95223e
 yuva420p            8673a9131fb47de69788863f93a50eb7
 yuvj420p            30427bd6caf5bda93a173dbebe759e09
 yuvj422p            fc8288f64fd149573f73cf8da05d8e6d

From d9d56036f3351af711261aa6e7a832017c00a34f Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 02:40:22 +0200
Subject: [PATCH 256/830] swscale: MMX optim of hscale16()

code is based on existing 8bit MMX code
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale_template.c | 143 ++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 33f9035618..827a32417b 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -2246,6 +2246,149 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,
                                     const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
 {
     int i, j;
+#if COMPILE_TEMPLATE_MMX
+    assert(filterSize % 4 == 0 && filterSize>0);
+    if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too.
+        x86_reg counter= -2*dstW;
+        filter-= counter*2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+            "movd                   %5, %%mm7       \n\t"
+#if defined(PIC)
+            "push            %%"REG_b"              \n\t"
+#endif
+            "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
+            "mov             %%"REG_a", %%"REG_BP"  \n\t"
+            ".p2align                4              \n\t"
+            "1:                                     \n\t"
+            "movzwl   (%2, %%"REG_BP"), %%eax       \n\t"
+            "movzwl  2(%2, %%"REG_BP"), %%ebx       \n\t"
+            "movq  (%1, %%"REG_BP", 4), %%mm1       \n\t"
+            "movq 8(%1, %%"REG_BP", 4), %%mm3       \n\t"
+            "movq      (%3, %%"REG_a", 2), %%mm0    \n\t"
+            "movq      (%3, %%"REG_b", 2), %%mm2    \n\t"
+            "pmaddwd             %%mm1, %%mm0       \n\t"
+            "pmaddwd             %%mm2, %%mm3       \n\t"
+            "movq                %%mm0, %%mm4       \n\t"
+            "punpckldq           %%mm3, %%mm0       \n\t"
+            "punpckhdq           %%mm3, %%mm4       \n\t"
+            "paddd               %%mm4, %%mm0       \n\t"
+            "psrad               %%mm7, %%mm0       \n\t"
+            "packssdw            %%mm0, %%mm0       \n\t"
+            "movd                %%mm0, (%4, %%"REG_BP")    \n\t"
+            "add                    $4, %%"REG_BP"  \n\t"
+            " jnc                   1b              \n\t"
+
+            "pop            %%"REG_BP"              \n\t"
+#if defined(PIC)
+            "pop             %%"REG_b"              \n\t"
+#endif
+            : "+a" (counter)
+            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
+#if !defined(PIC)
+            : "%"REG_b
+#endif
+        );
+    } else if (filterSize==8 && shift<15) {
+        x86_reg counter= -2*dstW;
+        filter-= counter*4;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+            "movd                   %5, %%mm7       \n\t"
+#if defined(PIC)
+            "push            %%"REG_b"              \n\t"
+#endif
+            "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
+            "mov              %%"REG_a", %%"REG_BP" \n\t"
+            ".p2align                 4             \n\t"
+            "1:                                     \n\t"
+            "movzwl    (%2, %%"REG_BP"), %%eax      \n\t"
+            "movzwl   2(%2, %%"REG_BP"), %%ebx      \n\t"
+            "movq   (%1, %%"REG_BP", 8), %%mm1      \n\t"
+            "movq 16(%1, %%"REG_BP", 8), %%mm3      \n\t"
+            "movq       (%3, %%"REG_a", 2), %%mm0   \n\t"
+            "movq       (%3, %%"REG_b", 2), %%mm2   \n\t"
+            "pmaddwd              %%mm1, %%mm0      \n\t"
+            "pmaddwd              %%mm2, %%mm3      \n\t"
+
+            "movq  8(%1, %%"REG_BP", 8), %%mm1      \n\t"
+            "movq 24(%1, %%"REG_BP", 8), %%mm5      \n\t"
+            "movq      8(%3, %%"REG_a", 2), %%mm4   \n\t"
+            "movq      8(%3, %%"REG_b", 2), %%mm2   \n\t"
+            "pmaddwd              %%mm1, %%mm4      \n\t"
+            "pmaddwd              %%mm2, %%mm5      \n\t"
+            "paddd                %%mm4, %%mm0      \n\t"
+            "paddd                %%mm5, %%mm3      \n\t"
+            "movq                 %%mm0, %%mm4      \n\t"
+            "punpckldq            %%mm3, %%mm0      \n\t"
+            "punpckhdq            %%mm3, %%mm4      \n\t"
+            "paddd                %%mm4, %%mm0      \n\t"
+            "psrad                %%mm7, %%mm0      \n\t"
+            "packssdw             %%mm0, %%mm0      \n\t"
+            "movd                 %%mm0, (%4, %%"REG_BP")   \n\t"
+            "add                     $4, %%"REG_BP" \n\t"
+            " jnc                    1b             \n\t"
+
+            "pop             %%"REG_BP"             \n\t"
+#if defined(PIC)
+            "pop             %%"REG_b"              \n\t"
+#endif
+            : "+a" (counter)
+            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
+#if !defined(PIC)
+            : "%"REG_b
+#endif
+        );
+    } else if (shift<15){
+        const uint16_t *offset = src+filterSize;
+        x86_reg counter= -2*dstW;
+        //filter-= counter*filterSize/2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+            "movd                   %7, %%mm7       \n\t"
+            ".p2align                  4            \n\t"
+            "1:                                     \n\t"
+            "mov                      %2, %%"REG_c" \n\t"
+            "movzwl      (%%"REG_c", %0), %%eax     \n\t"
+            "movzwl     2(%%"REG_c", %0), %%edx     \n\t"
+            "mov                      %5, %%"REG_c" \n\t"
+            "pxor                  %%mm4, %%mm4     \n\t"
+            "pxor                  %%mm5, %%mm5     \n\t"
+            "2:                                     \n\t"
+            "movq                   (%1), %%mm1     \n\t"
+            "movq               (%1, %6), %%mm3     \n\t"
+            "movq (%%"REG_c", %%"REG_a", 2), %%mm0     \n\t"
+            "movq (%%"REG_c", %%"REG_d", 2), %%mm2     \n\t"
+            "pmaddwd               %%mm1, %%mm0     \n\t"
+            "pmaddwd               %%mm2, %%mm3     \n\t"
+            "paddd                 %%mm3, %%mm5     \n\t"
+            "paddd                 %%mm0, %%mm4     \n\t"
+            "add                      $8, %1        \n\t"
+            "add                      $8, %%"REG_c" \n\t"
+            "cmp                      %4, %%"REG_c" \n\t"
+            " jb                      2b            \n\t"
+            "add                      %6, %1        \n\t"
+            "movq                  %%mm4, %%mm0     \n\t"
+            "punpckldq             %%mm5, %%mm4     \n\t"
+            "punpckhdq             %%mm5, %%mm0     \n\t"
+            "paddd                 %%mm0, %%mm4     \n\t"
+            "psrad                 %%mm7, %%mm4     \n\t"
+            "packssdw              %%mm4, %%mm4     \n\t"
+            "mov                      %3, %%"REG_a" \n\t"
+            "movd                  %%mm4, (%%"REG_a", %0)   \n\t"
+            "add                      $4, %0        \n\t"
+            " jnc                     1b            \n\t"
+
+            : "+r" (counter), "+r" (filter)
+            : "m" (filterPos), "m" (dst), "m"(offset),
+            "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift)
+            : "%"REG_a, "%"REG_c, "%"REG_d
+        );
+    } else
+#endif
     for (i=0; i<dstW; i++) {
         int srcPos= filterPos[i];
         int val=0;

From d1adad3cca407f493c3637e20ecd4f7124e69212 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 05:23:39 +0200
Subject: [PATCH 257/830] Merge swscale bloatup This will be cleaned up in the
 next merge

Authorship / merged commits:
commit f668afd4896ee65683619b6f165dda4cdd46766f
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 09:12:34 2011 +0200

    swscale: fix "ISO C90 forbids mixed declarations and code" warning

    only hit with --enable-runtime-cpudetect

commit 7f2ae5c7af374dfe254195a9375974a2ff9395a7
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 02:09:44 2011 +0200

    swscale: fix compilation with --enable-runtime-cpudetect

commit b6cad3df822969b31bb93eaf677e52a72416bc97
Author: Janne Grunau <janne-libav@jannau.net>
Date:   Fri Apr 15 00:31:04 2011 +0200

    swscale: correct include path to fix ppc altivec build

commit 6216fc70b74e01a5272085329aa92f5ac797f9cf
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Thu Apr 14 22:03:45 2011 +0200

    swscale: simplify rgb2rgb templating

    MMX is always built. Drop the ifdefs

commit 33a0421bbaa64f4e9c3d852b7f225ede8dad1388
Author: Josh Allmann <joshua.allmann@gmail.com>
Date:   Wed Apr 13 20:57:32 2011 +0200

    swscale: simplify initialization code

    Simplify the fallthrough case when no accelerated functions
    can be initialized.

commit 735bf1951171a1e0ee4292e84a1b1beac45dd0ab
Author: Josh Allmann <joshua.allmann@gmail.com>
Date:   Wed Apr 13 20:57:31 2011 +0200

    swscale: further cleanup swscale.c

    Move x86-specific constants out of swscale.c

commit 86330b4c9258d5e583c0db033d1e68f46443307c
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Wed Apr 13 20:57:30 2011 +0200

    swscale: partially move the arch specific code left

    PPC and x86 code is split off from swscale_template.c. Lots of code is
    still duplicated and should be removed later.

    Again uniformize the init system to be more similar to the dsputil one.

    Unset h*scale_fast in the x86 init in order to make the output
    consistent with the previous status. Thanks to Josh for spotting it.

commit c0038328830d7b341c28d7c99b0236a33617fd21
Author: Luca Barbato <lu_zero@gentoo.org>
Date:   Wed Apr 13 20:57:29 2011 +0200

    swscale: move away x86 specific code from rgb2rgb

    Keep only the plain C code in the main rgb2rgb.c and move the x86
    specific optimizations to x86/rgb2rgb.c
    Change the initialization pattern a little so some of it can be
    factorized to behave more like dsputils.

Conflicts:
	libswscale/rgb2rgb.c
	libswscale/swscale_template.c
---
 libswscale/Makefile               |    3 +-
 libswscale/ppc/swscale_template.c |  918 ++++++++++
 libswscale/rgb2rgb.c              |   95 +-
 libswscale/rgb2rgb.h              |    2 +
 libswscale/rgb2rgb_template.c     | 2305 ++---------------------
 libswscale/swscale.c              |   93 +-
 libswscale/swscale_template.c     | 2417 ++----------------------
 libswscale/x86/rgb2rgb.c          |  137 ++
 libswscale/x86/rgb2rgb_template.c | 2646 +++++++++++++++++++++++++++
 libswscale/x86/swscale_template.c | 2825 +++++++++++++++++++++++++++++
 libswscale/x86/swscale_template.h |   79 +
 11 files changed, 6914 insertions(+), 4606 deletions(-)
 create mode 100644 libswscale/ppc/swscale_template.c
 create mode 100644 libswscale/x86/rgb2rgb.c
 create mode 100644 libswscale/x86/rgb2rgb_template.c
 create mode 100644 libswscale/x86/swscale_template.c
 create mode 100644 libswscale/x86/swscale_template.h

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 6976079686..7f8f721e45 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -12,7 +12,8 @@ OBJS-$(ARCH_BFIN)          +=  bfin/internal_bfin.o     \
                                bfin/yuv2rgb_bfin.o
 OBJS-$(CONFIG_MLIB)        +=  mlib/yuv2rgb_mlib.o
 OBJS-$(HAVE_ALTIVEC)       +=  ppc/yuv2rgb_altivec.o
-OBJS-$(HAVE_MMX)           +=  x86/yuv2rgb_mmx.o
+OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
+                               x86/yuv2rgb_mmx.o
 OBJS-$(HAVE_VIS)           +=  sparc/yuv2rgb_vis.o
 
 TESTPROGS = colorspace swscale
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
new file mode 100644
index 0000000000..30bf20d44a
--- /dev/null
+++ b/libswscale/ppc/swscale_template.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#if COMPILE_TEMPLATE_ALTIVEC
+#include "swscale_altivec_template.c"
+#endif
+
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+#if COMPILE_TEMPLATE_ALTIVEC
+    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
+                          chrFilter, chrSrc, chrFilterSize,
+                          dest, uDest, vDest, dstW, chrDstW);
+#else //COMPILE_TEMPLATE_ALTIVEC
+    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
+                chrFilter, chrSrc, chrFilterSize,
+                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
+#endif //!COMPILE_TEMPLATE_ALTIVEC
+}
+
+static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
+{
+    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
+                 chrFilter, chrSrc, chrFilterSize,
+                 dest, uDest, dstW, chrDstW, dstFormat);
+}
+
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val= (lumSrc[i]+64)>>7;
+
+        if (val&256) {
+            if (val<0) val=0;
+            else       val=255;
+        }
+
+        dest[i]= val;
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++) {
+            int u=(chrSrc[i       ]+64)>>7;
+            int v=(chrSrc[i + VOFW]+64)>>7;
+
+            if ((u|v)&256) {
+                if (u<0)        u=0;
+                else if (u>255) u=255;
+                if (v<0)        v=0;
+                else if (v>255) v=255;
+            }
+
+            uDest[i]= u;
+            vDest[i]= v;
+        }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest)
+        for (i=0; i<dstW; i++) {
+            int val= (alpSrc[i]+64)>>7;
+            aDest[i]= av_clip_uint8(val);
+        }
+}
+
+
+/**
+ * vertical scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+#if COMPILE_TEMPLATE_ALTIVEC
+    /* The following list of supported dstFormat values should
+       match what's found in the body of ff_yuv2packedX_altivec() */
+    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
+         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
+          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
+          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
+            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
+                                   chrFilter, chrSrc, chrFilterSize,
+                                   dest, dstW, dstY);
+    else
+#endif
+        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                       chrFilter, chrSrc, chrFilterSize,
+                       alpSrc, dest, dstW, dstY);
+}
+
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (flags&SWS_FULL_CHR_H_INT) {
+        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
+        return;
+    }
+
+    if (uvalpha < 2048) {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    } else {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    }
+}
+
+//FIXME yuy2* can read up to 7 samples too much
+
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i];
+}
+
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[4*i + 1];
+        dstV[i]= src1[4*i + 3];
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[2*i + 1];
+        dstV[i]= src2[2*i + 1];
+    }
+}
+
+/* This is almost identical to the previous, end exists only because
+ * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i+1];
+}
+
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[4*i + 0];
+        dstV[i]= src1[4*i + 2];
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[2*i];
+        dstV[i]= src2[2*i];
+    }
+}
+
+static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
+                                    const uint8_t *src, long width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst1[i] = src[2*i+0];
+        dst2[i] = src[2*i+1];
+    }
+}
+
+static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+                                    const uint8_t *src1, const uint8_t *src2,
+                                    long width, uint32_t *unused)
+{
+    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
+}
+
+static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
+                                    const uint8_t *src1, const uint8_t *src2,
+                                    long width, uint32_t *unused)
+{
+    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
+}
+
+
+static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src[i*3+0];
+        int g= src[i*3+1];
+        int r= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+}
+
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int r= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int r= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int r= src[i*3+0];
+        int g= src[i*3+1];
+        int b= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+}
+
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++) {
+        int r= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int b= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+}
+
+static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++) {
+        int r= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int b= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+}
+
+
+// bilinear / bicubic scaling
+static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
+                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+{
+#if COMPILE_TEMPLATE_ALTIVEC
+    hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
+#else
+    int i;
+    for (i=0; i<dstW; i++) {
+        int j;
+        int srcPos= filterPos[i];
+        int val=0;
+        //printf("filterPos: %d\n", filterPos[i]);
+        for (j=0; j<filterSize; j++) {
+            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        //filter += hFilterSize;
+        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
+        //dst[i] = val>>7;
+    }
+#endif /* COMPILE_TEMPLATE_ALTIVEC */
+}
+
+//FIXME all pal and rgb srcFormats could do this convertion as well
+//FIXME all scalers more complex than bilinear could do half of this transform
+static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
+        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
+    }
+}
+static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
+        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
+    }
+}
+static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
+}
+static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (dst[i]*14071 + 33561947)>>14;
+}
+
+static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
+                                        long dstWidth, const uint8_t *src, int srcW,
+                                        int xInc)
+{
+    int i;
+    unsigned int xpos=0;
+    for (i=0;i<dstWidth;i++) {
+        register unsigned int xx=xpos>>16;
+        register unsigned int xalpha=(xpos&0xFFFF)>>9;
+        dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
+        xpos+=xInc;
+    }
+}
+
+      // *** horizontal scale Y line to temp buffer
+static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
+                                   const int16_t *hLumFilter,
+                                   const int16_t *hLumFilterPos, int hLumFilterSize,
+                                   uint8_t *formatConvBuffer,
+                                   uint32_t *pal, int isAlpha)
+{
+    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
+
+    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
+
+    if (toYV12) {
+        toYV12(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+
+    if (c->hScale16) {
+        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hyscale_fast) {
+        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
+    }
+
+    if (convertRange)
+        convertRange(dst, dstWidth);
+}
+
+static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
+                                        long dstWidth, const uint8_t *src1,
+                                        const uint8_t *src2, int srcW, int xInc)
+{
+    int i;
+    unsigned int xpos=0;
+    for (i=0;i<dstWidth;i++) {
+        register unsigned int xx=xpos>>16;
+        register unsigned int xalpha=(xpos&0xFFFF)>>9;
+        dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+        dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
+        /* slower
+        dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
+        dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
+        */
+        xpos+=xInc;
+    }
+}
+
+inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
+                                   int srcW, int xInc, const int16_t *hChrFilter,
+                                   const int16_t *hChrFilterPos, int hChrFilterSize,
+                                   uint8_t *formatConvBuffer,
+                                   uint32_t *pal)
+{
+
+    src1 += c->chrSrcOffset;
+    src2 += c->chrSrcOffset;
+
+    if (c->chrToYV12) {
+        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+
+    if (c->hScale16) {
+        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hcscale_fast) {
+        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
+    }
+
+    if (c->chrConvertRange)
+        c->chrConvertRange(dst, dstWidth);
+}
+
+#define DEBUG_SWSCALE_BUFFERS 0
+#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
+
+static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    /* load a few things into local vars to make the code more readable? and faster */
+    const int srcW= c->srcW;
+    const int dstW= c->dstW;
+    const int dstH= c->dstH;
+    const int chrDstW= c->chrDstW;
+    const int chrSrcW= c->chrSrcW;
+    const int lumXInc= c->lumXInc;
+    const int chrXInc= c->chrXInc;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    const int flags= c->flags;
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *hLumFilterPos= c->hLumFilterPos;
+    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int16_t *hLumFilter= c->hLumFilter;
+    int16_t *hChrFilter= c->hChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int hLumFilterSize= c->hLumFilterSize;
+    const int hChrFilterSize= c->hChrFilterSize;
+    int16_t **lumPixBuf= c->lumPixBuf;
+    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **alpPixBuf= c->alpPixBuf;
+    const int vLumBufSize= c->vLumBufSize;
+    const int vChrBufSize= c->vChrBufSize;
+    uint8_t *formatConvBuffer= c->formatConvBuffer;
+    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
+    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+    int lastDstY;
+    uint32_t *pal=c->pal_yuv;
+
+    /* vars which will change and which we need to store back in the context */
+    int dstY= c->dstY;
+    int lumBufIndex= c->lumBufIndex;
+    int chrBufIndex= c->chrBufIndex;
+    int lastInLumBuf= c->lastInLumBuf;
+    int lastInChrBuf= c->lastInChrBuf;
+
+    if (isPacked(c->srcFormat)) {
+        src[0]=
+        src[1]=
+        src[2]=
+        src[3]= src[0];
+        srcStride[0]=
+        srcStride[1]=
+        srcStride[2]=
+        srcStride[3]= srcStride[0];
+    }
+    srcStride[1]<<= c->vChrDrop;
+    srcStride[2]<<= c->vChrDrop;
+
+    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
+                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
+                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
+    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
+                   srcSliceY,    srcSliceH,    dstY,    dstH);
+    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
+                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
+
+    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
+        static int warnedAlready=0; //FIXME move this into the context perhaps
+        if (flags & SWS_PRINT_INFO && !warnedAlready) {
+            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
+                   "         ->cannot do aligned memory accesses anymore\n");
+            warnedAlready=1;
+        }
+    }
+
+    /* Note the user might start scaling the picture in the middle so this
+       will not get executed. This is not really intended but works
+       currently, so people might do it. */
+    if (srcSliceY ==0) {
+        lumBufIndex=-1;
+        chrBufIndex=-1;
+        dstY=0;
+        lastInLumBuf= -1;
+        lastInChrBuf= -1;
+    }
+
+    lastDstY= dstY;
+
+    for (;dstY < dstH; dstY++) {
+        unsigned char *dest =dst[0]+dstStride[0]*dstY;
+        const int chrDstY= dstY>>c->chrDstVSubSample;
+        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
+        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
+        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+
+        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
+        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
+        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
+        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
+        int enough_lines;
+
+        //handle holes (FAST_BILINEAR & weird filters)
+        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
+        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
+        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
+        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
+
+        DEBUG_BUFFERS("dstY: %d\n", dstY);
+        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
+                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
+        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
+                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
+
+        // Do we have enough lines in this slice to output the dstY line
+        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
+
+        if (!enough_lines) {
+            lastLumSrcY = srcSliceY + srcSliceH - 1;
+            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
+            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
+                                            lastLumSrcY, lastChrSrcY);
+        }
+
+        //Do horizontal scaling
+        while(lastInLumBuf < lastLumSrcY) {
+            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
+            lumBufIndex++;
+            assert(lumBufIndex < 2*vLumBufSize);
+            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+            assert(lastInLumBuf + 1 - srcSliceY >= 0);
+            RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+                            hLumFilter, hLumFilterPos, hLumFilterSize,
+                            formatConvBuffer,
+                            pal, 0);
+            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
+                RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
+                                hLumFilter, hLumFilterPos, hLumFilterSize,
+                                formatConvBuffer,
+                                pal, 1);
+            lastInLumBuf++;
+            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
+                               lumBufIndex,    lastInLumBuf);
+        }
+        while(lastInChrBuf < lastChrSrcY) {
+            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+            chrBufIndex++;
+            assert(chrBufIndex < 2*vChrBufSize);
+            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
+            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+            //FIXME replace parameters through context struct (some at least)
+
+            if (c->needs_hcscale)
+                RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                                hChrFilter, hChrFilterPos, hChrFilterSize,
+                                formatConvBuffer,
+                                pal);
+            lastInChrBuf++;
+            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
+                               chrBufIndex,    lastInChrBuf);
+        }
+        //wrap buf index around to stay inside the ring buffer
+        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
+        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
+        if (!enough_lines)
+            break; //we can't output a dstY line so let's try with the next slice
+
+        if (dstY < dstH-2) {
+            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                c->yuv2nv12X(c,
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
+                    yuv2yuvX16inC(
+                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+                    const int16_t *lumBuf = lumSrcPtr[0];
+                    const int16_t *chrBuf= chrSrcPtr[0];
+                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
+                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
+                } else { //General YV12
+                    c->yuv2yuvX(c,
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL,
+                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                    }
+                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
+                    int lumAlpha= vLumFilter[2*dstY+1];
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    lumMmxFilter[2]=
+                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
+                    chrMmxFilter[2]=
+                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
+                                       dest, dstW, lumAlpha, chrAlpha, dstY);
+                    }
+                } else { //general RGB
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c,
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packedX(c,
+                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                       alpSrcPtr, dest, dstW, dstY);
+                    }
+                }
+            }
+        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
+            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                yuv2nv12XinC(
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
+                    yuv2yuvX16inC(
+                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else {
+                    yuv2yuvXinC(
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if(flags & SWS_FULL_CHR_H_INT) {
+                    yuv2rgbXinC_full(c,
+                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                     alpSrcPtr, dest, dstW, dstY);
+                } else {
+                    yuv2packedXinC(c,
+                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                   alpSrcPtr, dest, dstW, dstY);
+                }
+            }
+        }
+    }
+
+    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
+        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
+
+    /* store changed local vars back in the context */
+    c->dstY= dstY;
+    c->lumBufIndex= lumBufIndex;
+    c->chrBufIndex= chrBufIndex;
+    c->lastInLumBuf= lastInLumBuf;
+    c->lastInChrBuf= lastInChrBuf;
+
+    return dstY - lastDstY;
+}
+
+static void RENAME(sws_init_swScale)(SwsContext *c)
+{
+    enum PixelFormat srcFormat = c->srcFormat;
+
+    c->yuv2nv12X    = RENAME(yuv2nv12X   );
+    c->yuv2yuv1     = RENAME(yuv2yuv1    );
+    c->yuv2yuvX     = RENAME(yuv2yuvX    );
+    c->yuv2packed1  = RENAME(yuv2packed1 );
+    c->yuv2packed2  = RENAME(yuv2packed2 );
+    c->yuv2packedX  = RENAME(yuv2packedX );
+
+    c->hScale       = RENAME(hScale      );
+
+    if (c->flags & SWS_FAST_BILINEAR)
+    {
+        c->hyscale_fast = RENAME(hyscale_fast);
+        c->hcscale_fast = RENAME(hcscale_fast);
+    }
+
+    c->chrToYV12 = NULL;
+    switch(srcFormat) {
+        case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
+        case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
+        case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
+        case PIX_FMT_NV21     : c->chrToYV12 = RENAME(nv21ToUV); break;
+        case PIX_FMT_RGB8     :
+        case PIX_FMT_BGR8     :
+        case PIX_FMT_PAL8     :
+        case PIX_FMT_BGR4_BYTE:
+        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
+        case PIX_FMT_GRAY16BE :
+        case PIX_FMT_YUV420P9BE:
+        case PIX_FMT_YUV422P10BE:
+        case PIX_FMT_YUV420P10BE:
+        case PIX_FMT_YUV420P16BE:
+        case PIX_FMT_YUV422P16BE:
+        case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break;
+        case PIX_FMT_GRAY16LE :
+        case PIX_FMT_YUV420P9LE:
+        case PIX_FMT_YUV422P10LE:
+        case PIX_FMT_YUV420P10LE:
+        case PIX_FMT_YUV420P16LE:
+        case PIX_FMT_YUV422P16LE:
+        case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break;
+    }
+    if (c->chrSrcHSubSample) {
+        switch(srcFormat) {
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
+        case PIX_FMT_BGR48BE:
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
+        }
+    } else {
+        switch(srcFormat) {
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
+        case PIX_FMT_BGR48BE:
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
+        }
+    }
+
+    c->lumToYV12 = NULL;
+    c->alpToYV12 = NULL;
+    switch (srcFormat) {
+    case PIX_FMT_YUYV422  :
+    case PIX_FMT_GRAY8A   :
+                            c->lumToYV12 = RENAME(yuy2ToY); break;
+    case PIX_FMT_UYVY422  :
+                            c->lumToYV12 = RENAME(uyvyToY); break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
+    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
+    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
+    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
+    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
+    case PIX_FMT_RGB8     :
+    case PIX_FMT_BGR8     :
+    case PIX_FMT_PAL8     :
+    case PIX_FMT_BGR4_BYTE:
+    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
+    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
+    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
+    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
+    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
+    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
+    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
+    case PIX_FMT_RGB48BE:
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
+    case PIX_FMT_BGR48BE:
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
+    }
+    if (c->alpPixBuf) {
+        switch (srcFormat) {
+        case PIX_FMT_RGB32  :
+        case PIX_FMT_RGB32_1:
+        case PIX_FMT_BGR32  :
+        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
+        case PIX_FMT_GRAY8A : c->alpToYV12 = RENAME(yuy2ToY); break;
+        case PIX_FMT_PAL8   : c->alpToYV12 = palToA; break;
+        }
+    }
+
+    switch (srcFormat) {
+    case PIX_FMT_GRAY8A :
+        c->alpSrcOffset = 1;
+        break;
+    case PIX_FMT_RGB32  :
+    case PIX_FMT_BGR32  :
+        c->alpSrcOffset = 3;
+        break;
+    case PIX_FMT_RGB48LE:
+    case PIX_FMT_BGR48LE:
+        c->lumSrcOffset = 1;
+        c->chrSrcOffset = 1;
+        c->alpSrcOffset = 1;
+        break;
+    }
+
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->srcRange) {
+            c->lumConvertRange = RENAME(lumRangeFromJpeg);
+            c->chrConvertRange = RENAME(chrRangeFromJpeg);
+        } else {
+            c->lumConvertRange = RENAME(lumRangeToJpeg);
+            c->chrConvertRange = RENAME(chrRangeToJpeg);
+        }
+    }
+
+    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
+          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
+        c->needs_hcscale = 1;
+}
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index adc5d59c8c..0e770e2444 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -24,7 +24,6 @@
  */
 #include <inttypes.h>
 #include "config.h"
-#include "libavutil/x86_cpu.h"
 #include "libavutil/bswap.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
@@ -95,45 +94,6 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
                      long width, long height,
                      long lumStride, long chromStride, long srcStride);
 
-
-#if ARCH_X86
-DECLARE_ASM_CONST(8, uint64_t, mmx_ff)       = 0x00000000000000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32g)      = 0x0000FF000000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32r)      = 0x00FF000000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32)       = 0x00FFFFFF00FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
-DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
-DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
-#define mask16b mask15b
-DECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
-DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
-#endif /* ARCH_X86 */
-
 #define RGB2YUV_SHIFT 8
 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
@@ -145,50 +105,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
 
-//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
 //plain C versions
-#define COMPILE_TEMPLATE_MMX 0
-#define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define RENAME(a) a ## _C
 #include "rgb2rgb_template.c"
 
-#if ARCH_X86
-
-//MMX versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX
-#define COMPILE_TEMPLATE_MMX 1
-#define RENAME(a) a ## _MMX
-#include "rgb2rgb_template.c"
-
-//MMX2 versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX2
-#define COMPILE_TEMPLATE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "rgb2rgb_template.c"
-
-//SSE2 versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_SSE2
-#define COMPILE_TEMPLATE_SSE2 1
-#define RENAME(a) a ## _SSE2
-#include "rgb2rgb_template.c"
-
-//3DNOW versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX2
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_SSE2 1
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNOW
-#include "rgb2rgb_template.c"
-
-#endif //ARCH_X86 || ARCH_X86_64
 
 /*
  RGB15->RGB16 original by Strepto/Astral
@@ -199,18 +118,10 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 
 void sws_rgb2rgb_init(int flags)
 {
-#if HAVE_SSE2 || HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    if (flags & SWS_CPU_CAPS_SSE2)
-        rgb2rgb_init_SSE2();
-    else if (flags & SWS_CPU_CAPS_MMX2)
-        rgb2rgb_init_MMX2();
-    else if (flags & SWS_CPU_CAPS_3DNOW)
-        rgb2rgb_init_3DNOW();
-    else if (flags & SWS_CPU_CAPS_MMX)
-        rgb2rgb_init_MMX();
-    else
+    rgb2rgb_init_c();
+#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
+    rgb2rgb_init_x86(flags);
 #endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
-        rgb2rgb_init_C();
 }
 
 #if LIBSWSCALE_VERSION_MAJOR < 1
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 31e21af127..25bc396413 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -168,4 +168,6 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
 
 void sws_rgb2rgb_init(int flags);
 
+void rgb2rgb_init_x86(int flags);
+
 #endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 9af0eaa366..272a871256 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -26,85 +26,13 @@
 
 #include <stddef.h>
 
-#undef PREFETCH
-#undef MOVNTQ
-#undef EMMS
-#undef SFENCE
-#undef MMREG_SIZE
-#undef PAVGB
-
-#if COMPILE_TEMPLATE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PAVGB     "pavgusb"
-#elif COMPILE_TEMPLATE_MMX2
-#define PREFETCH "prefetchnta"
-#define PAVGB     "pavgb"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if COMPILE_TEMPLATE_MMX2
-#define MOVNTQ "movntq"
-#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
-
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 23;
-    __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "punpckldq    3%1, %%mm0    \n\t"
-            "movd         6%1, %%mm1    \n\t"
-            "punpckldq    9%1, %%mm1    \n\t"
-            "movd        12%1, %%mm2    \n\t"
-            "punpckldq   15%1, %%mm2    \n\t"
-            "movd        18%1, %%mm3    \n\t"
-            "punpckldq   21%1, %%mm3    \n\t"
-            "por        %%mm7, %%mm0    \n\t"
-            "por        %%mm7, %%mm1    \n\t"
-            "por        %%mm7, %%mm2    \n\t"
-            "por        %%mm7, %%mm3    \n\t"
-            MOVNTQ"     %%mm0,   %0     \n\t"
-            MOVNTQ"     %%mm1,  8%0     \n\t"
-            MOVNTQ"     %%mm2, 16%0     \n\t"
-            MOVNTQ"     %%mm3, 24%0"
-            :"=m"(*dest)
-            :"m"(*s)
-            :"memory");
-        dest += 32;
-        s += 24;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
+
     while (s < end) {
 #if HAVE_BIGENDIAN
         /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
@@ -122,76 +50,14 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-#define STORE_BGR24_MMX \
-            "psrlq         $8, %%mm2    \n\t" \
-            "psrlq         $8, %%mm3    \n\t" \
-            "psrlq         $8, %%mm6    \n\t" \
-            "psrlq         $8, %%mm7    \n\t" \
-            "pand "MANGLE(mask24l)", %%mm0\n\t" \
-            "pand "MANGLE(mask24l)", %%mm1\n\t" \
-            "pand "MANGLE(mask24l)", %%mm4\n\t" \
-            "pand "MANGLE(mask24l)", %%mm5\n\t" \
-            "pand "MANGLE(mask24h)", %%mm2\n\t" \
-            "pand "MANGLE(mask24h)", %%mm3\n\t" \
-            "pand "MANGLE(mask24h)", %%mm6\n\t" \
-            "pand "MANGLE(mask24h)", %%mm7\n\t" \
-            "por        %%mm2, %%mm0    \n\t" \
-            "por        %%mm3, %%mm1    \n\t" \
-            "por        %%mm6, %%mm4    \n\t" \
-            "por        %%mm7, %%mm5    \n\t" \
- \
-            "movq       %%mm1, %%mm2    \n\t" \
-            "movq       %%mm4, %%mm3    \n\t" \
-            "psllq        $48, %%mm2    \n\t" \
-            "psllq        $32, %%mm3    \n\t" \
-            "pand "MANGLE(mask24hh)", %%mm2\n\t" \
-            "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
-            "por        %%mm2, %%mm0    \n\t" \
-            "psrlq        $16, %%mm1    \n\t" \
-            "psrlq        $32, %%mm4    \n\t" \
-            "psllq        $16, %%mm5    \n\t" \
-            "por        %%mm3, %%mm1    \n\t" \
-            "pand  "MANGLE(mask24hhhh)", %%mm5\n\t" \
-            "por        %%mm5, %%mm4    \n\t" \
- \
-            MOVNTQ"     %%mm0,   %0     \n\t" \
-            MOVNTQ"     %%mm1,  8%0     \n\t" \
-            MOVNTQ"     %%mm4, 16%0"
-
-
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
+
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 31;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq        16%1, %%mm4    \n\t"
-            "movq        24%1, %%mm5    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            STORE_BGR24_MMX
-            :"=m"(*dest)
-            :"m"(*s)
-            :"memory");
-        dest += 24;
-        s += 32;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
+
     while (s < end) {
 #if HAVE_BIGENDIAN
         /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
@@ -215,39 +81,13 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
     register const uint8_t *end;
     const uint8_t *mm_end;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
-    mm_end = end - 15;
-    while (s<mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
-            "movq     %%mm0, %%mm1  \n\t"
-            "movq     %%mm2, %%mm3  \n\t"
-            "pand     %%mm4, %%mm0  \n\t"
-            "pand     %%mm4, %%mm2  \n\t"
-            "paddw    %%mm1, %%mm0  \n\t"
-            "paddw    %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     mm_end = end - 3;
     while (s < mm_end) {
         register unsigned x= *((const uint32_t *)s);
@@ -261,44 +101,14 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
     register const uint8_t *end;
     const uint8_t *mm_end;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
-    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
-    mm_end = end - 15;
-    while (s<mm_end) {
-        __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
-            "movq     %%mm0, %%mm1  \n\t"
-            "movq     %%mm2, %%mm3  \n\t"
-            "psrlq       $1, %%mm0  \n\t"
-            "psrlq       $1, %%mm2  \n\t"
-            "pand     %%mm7, %%mm0  \n\t"
-            "pand     %%mm7, %%mm2  \n\t"
-            "pand     %%mm6, %%mm1  \n\t"
-            "pand     %%mm6, %%mm3  \n\t"
-            "por      %%mm1, %%mm0  \n\t"
-            "por      %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
+
     mm_end = end - 3;
     while (s < mm_end) {
         register uint32_t x= *((const uint32_t*)s);
@@ -312,369 +122,61 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
-    __asm__ volatile(
-        "movq           %3, %%mm5   \n\t"
-        "movq           %4, %%mm6   \n\t"
-        "movq           %5, %%mm7   \n\t"
-        "jmp 2f                     \n\t"
-        ".p2align        4          \n\t"
-        "1:                         \n\t"
-        PREFETCH"   32(%1)          \n\t"
-        "movd         (%1), %%mm0   \n\t"
-        "movd        4(%1), %%mm3   \n\t"
-        "punpckldq   8(%1), %%mm0   \n\t"
-        "punpckldq  12(%1), %%mm3   \n\t"
-        "movq        %%mm0, %%mm1   \n\t"
-        "movq        %%mm3, %%mm4   \n\t"
-        "pand        %%mm6, %%mm0   \n\t"
-        "pand        %%mm6, %%mm3   \n\t"
-        "pmaddwd     %%mm7, %%mm0   \n\t"
-        "pmaddwd     %%mm7, %%mm3   \n\t"
-        "pand        %%mm5, %%mm1   \n\t"
-        "pand        %%mm5, %%mm4   \n\t"
-        "por         %%mm1, %%mm0   \n\t"
-        "por         %%mm4, %%mm3   \n\t"
-        "psrld          $5, %%mm0   \n\t"
-        "pslld         $11, %%mm3   \n\t"
-        "por         %%mm3, %%mm0   \n\t"
-        MOVNTQ"      %%mm0, (%0)    \n\t"
-        "add           $16,  %1     \n\t"
-        "add            $8,  %0     \n\t"
-        "2:                         \n\t"
-        "cmp            %2,  %1     \n\t"
-        " jb            1b          \n\t"
-        : "+r" (d), "+r"(s)
-        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
-    );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq    %0, %%mm7    \n\t"
-        "movq    %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "psrlq         $8, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
+
     while (s < end) {
         register int rgb = *(const uint32_t*)s; s += 4;
         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
     }
 }
 
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $8, %%mm0    \n\t"
-            "psllq         $8, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register int rgb = *(const uint32_t*)s; s += 4;
         *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
     }
 }
 
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
-    __asm__ volatile(
-        "movq           %3, %%mm5   \n\t"
-        "movq           %4, %%mm6   \n\t"
-        "movq           %5, %%mm7   \n\t"
-        "jmp            2f          \n\t"
-        ".p2align        4          \n\t"
-        "1:                         \n\t"
-        PREFETCH"   32(%1)          \n\t"
-        "movd         (%1), %%mm0   \n\t"
-        "movd        4(%1), %%mm3   \n\t"
-        "punpckldq   8(%1), %%mm0   \n\t"
-        "punpckldq  12(%1), %%mm3   \n\t"
-        "movq        %%mm0, %%mm1   \n\t"
-        "movq        %%mm3, %%mm4   \n\t"
-        "pand        %%mm6, %%mm0   \n\t"
-        "pand        %%mm6, %%mm3   \n\t"
-        "pmaddwd     %%mm7, %%mm0   \n\t"
-        "pmaddwd     %%mm7, %%mm3   \n\t"
-        "pand        %%mm5, %%mm1   \n\t"
-        "pand        %%mm5, %%mm4   \n\t"
-        "por         %%mm1, %%mm0   \n\t"
-        "por         %%mm4, %%mm3   \n\t"
-        "psrld          $6, %%mm0   \n\t"
-        "pslld         $10, %%mm3   \n\t"
-        "por         %%mm3, %%mm0   \n\t"
-        MOVNTQ"      %%mm0, (%0)    \n\t"
-        "add           $16,  %1     \n\t"
-        "add            $8,  %0     \n\t"
-        "2:                         \n\t"
-        "cmp            %2,  %1     \n\t"
-        " jb            1b          \n\t"
-        : "+r" (d), "+r"(s)
-        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
-    );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $9, %%mm2    \n\t"
-            "psrlq         $9, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register int rgb = *(const uint32_t*)s; s += 4;
         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
     }
 }
 
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $7, %%mm0    \n\t"
-            "psllq         $7, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register int rgb = *(const uint32_t*)s; s += 4;
         *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
     }
 }
 
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 11;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "psrlq         $8, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         const int b = *s++;
         const int g = *s++;
@@ -683,59 +185,12 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psllq         $8, %%mm0    \n\t"
-            "psllq         $8, %%mm3    \n\t"
-            "pand       %%mm7, %%mm0    \n\t"
-            "pand       %%mm7, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq        $19, %%mm2    \n\t"
-            "psrlq        $19, %%mm5    \n\t"
-            "pand          %2, %%mm2    \n\t"
-            "pand          %2, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         const int r = *s++;
         const int g = *s++;
@@ -744,59 +199,12 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 11;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $9, %%mm2    \n\t"
-            "psrlq         $9, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         const int b = *s++;
         const int g = *s++;
@@ -805,59 +213,12 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint8_t *mm_end;
-#endif
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"   32%1            \n\t"
-            "movd         %1, %%mm0     \n\t"
-            "movd        3%1, %%mm3     \n\t"
-            "punpckldq   6%1, %%mm0     \n\t"
-            "punpckldq   9%1, %%mm3     \n\t"
-            "movq      %%mm0, %%mm1     \n\t"
-            "movq      %%mm0, %%mm2     \n\t"
-            "movq      %%mm3, %%mm4     \n\t"
-            "movq      %%mm3, %%mm5     \n\t"
-            "psllq        $7, %%mm0     \n\t"
-            "psllq        $7, %%mm3     \n\t"
-            "pand      %%mm7, %%mm0     \n\t"
-            "pand      %%mm7, %%mm3     \n\t"
-            "psrlq        $6, %%mm1     \n\t"
-            "psrlq        $6, %%mm4     \n\t"
-            "pand      %%mm6, %%mm1     \n\t"
-            "pand      %%mm6, %%mm4     \n\t"
-            "psrlq       $19, %%mm2     \n\t"
-            "psrlq       $19, %%mm5     \n\t"
-            "pand         %2, %%mm2     \n\t"
-            "pand         %2, %%mm5     \n\t"
-            "por       %%mm1, %%mm0     \n\t"
-            "por       %%mm4, %%mm3     \n\t"
-            "por       %%mm2, %%mm0     \n\t"
-            "por       %%mm5, %%mm3     \n\t"
-            "psllq       $16, %%mm3     \n\t"
-            "por       %%mm3, %%mm0     \n\t"
-            MOVNTQ"    %%mm0, %0        \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         const int r = *s++;
         const int g = *s++;
@@ -887,104 +248,12 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
        |
    original bits
 */
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint16_t *mm_end;
-#endif
     uint8_t *d = dst;
     const uint16_t *s = (const uint16_t*)src;
     end = s + src_size/2;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $2, %%mm1    \n\t"
-            "psrlq         $7, %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            "movq       %%mm0, %%mm6    \n\t"
-            "movq       %%mm3, %%mm7    \n\t"
-
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $2, %%mm1    \n\t"
-            "psrlq         $7, %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
-            :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-            "movq       %%mm0, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "movq       %%mm6, %%mm0    \n\t"
-            "movq       %%mm7, %%mm1    \n\t"
-
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-
-            STORE_BGR24_MMX
-
-            :"=m"(*d)
-            :"m"(*s)
-            :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register uint16_t bgr;
         bgr = *s++;
@@ -994,103 +263,12 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint16_t *mm_end;
-#endif
     uint8_t *d = (uint8_t *)dst;
     const uint16_t *s = (const uint16_t *)src;
     end = s + src_size/2;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm1    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-
-            "movq       %%mm0, %%mm6    \n\t"
-            "movq       %%mm3, %%mm7    \n\t"
-
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm1    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "movq       %%mm0, %%mm3    \n\t"
-            "movq       %%mm1, %%mm4    \n\t"
-            "movq       %%mm2, %%mm5    \n\t"
-            "punpcklwd     %5, %%mm0    \n\t"
-            "punpcklwd     %5, %%mm1    \n\t"
-            "punpcklwd     %5, %%mm2    \n\t"
-            "punpckhwd     %5, %%mm3    \n\t"
-            "punpckhwd     %5, %%mm4    \n\t"
-            "punpckhwd     %5, %%mm5    \n\t"
-            "psllq         $8, %%mm1    \n\t"
-            "psllq        $16, %%mm2    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "psllq         $8, %%mm4    \n\t"
-            "psllq        $16, %%mm5    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
-            :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-            "movq       %%mm0, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "movq       %%mm6, %%mm0    \n\t"
-            "movq       %%mm7, %%mm1    \n\t"
-
-            "movq       %%mm4, %%mm6    \n\t"
-            "movq       %%mm5, %%mm7    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm1, %%mm3    \n\t"
-
-            STORE_BGR24_MMX
-
-            :"=m"(*d)
-            :"m"(*s)
-            :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register uint16_t bgr;
         bgr = *s++;
@@ -1119,42 +297,12 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
     MOVNTQ"     %%mm0,  %0      \n\t"                               \
     MOVNTQ"     %%mm3, 8%0      \n\t"                               \
 
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint16_t *mm_end;
-#endif
     uint8_t *d = dst;
     const uint16_t *s = (const uint16_t *)src;
     end = s + src_size/2;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $2, %%mm1    \n\t"
-            "psrlq         $7, %%mm2    \n\t"
-            PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
-            :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register uint16_t bgr;
         bgr = *s++;
@@ -1172,42 +320,12 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     const uint16_t *end;
-#if COMPILE_TEMPLATE_MMX
-    const uint16_t *mm_end;
-#endif
     uint8_t *d = dst;
     const uint16_t *s = (const uint16_t*)src;
     end = s + src_size/2;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %3, %%mm1    \n\t"
-            "pand          %4, %%mm2    \n\t"
-            "psllq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm1    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
-            :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
     while (s < end) {
         register uint16_t bgr;
         bgr = *s++;
@@ -1225,63 +343,11 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
-    x86_reg idx = 15 - src_size;
+    int idx = 15 - src_size;
     const uint8_t *s = src-idx;
     uint8_t *d = dst-idx;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "test          %0, %0           \n\t"
-        "jns           2f               \n\t"
-        PREFETCH"       (%1, %0)        \n\t"
-        "movq          %3, %%mm7        \n\t"
-        "pxor          %4, %%mm7        \n\t"
-        "movq       %%mm7, %%mm6        \n\t"
-        "pxor          %5, %%mm7        \n\t"
-        ".p2align       4               \n\t"
-        "1:                             \n\t"
-        PREFETCH"     32(%1, %0)        \n\t"
-        "movq           (%1, %0), %%mm0 \n\t"
-        "movq          8(%1, %0), %%mm1 \n\t"
-# if COMPILE_TEMPLATE_MMX2
-        "pshufw      $177, %%mm0, %%mm3 \n\t"
-        "pshufw      $177, %%mm1, %%mm5 \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm3        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm5        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# else
-        "movq       %%mm0, %%mm2        \n\t"
-        "movq       %%mm1, %%mm4        \n\t"
-        "pand       %%mm7, %%mm0        \n\t"
-        "pand       %%mm6, %%mm2        \n\t"
-        "pand       %%mm7, %%mm1        \n\t"
-        "pand       %%mm6, %%mm4        \n\t"
-        "movq       %%mm2, %%mm3        \n\t"
-        "movq       %%mm4, %%mm5        \n\t"
-        "pslld        $16, %%mm2        \n\t"
-        "psrld        $16, %%mm3        \n\t"
-        "pslld        $16, %%mm4        \n\t"
-        "psrld        $16, %%mm5        \n\t"
-        "por        %%mm2, %%mm0        \n\t"
-        "por        %%mm4, %%mm1        \n\t"
-        "por        %%mm3, %%mm0        \n\t"
-        "por        %%mm5, %%mm1        \n\t"
-# endif
-        MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
-        MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
-        "add          $16, %0           \n\t"
-        "js            1b               \n\t"
-        SFENCE"                         \n\t"
-        EMMS"                           \n\t"
-        "2:                             \n\t"
-        : "+&r"(idx)
-        : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
-        : "memory");
-#endif
     for (; idx<15; idx+=4) {
         register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
         v &= 0xff00ff;
@@ -1289,66 +355,9 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
     }
 }
 
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
 {
     unsigned i;
-#if COMPILE_TEMPLATE_MMX
-    x86_reg mmx_size= 23 - src_size;
-    __asm__ volatile (
-        "test             %%"REG_a", %%"REG_a"          \n\t"
-        "jns                     2f                     \n\t"
-        "movq     "MANGLE(mask24r)", %%mm5              \n\t"
-        "movq     "MANGLE(mask24g)", %%mm6              \n\t"
-        "movq     "MANGLE(mask24b)", %%mm7              \n\t"
-        ".p2align                 4                     \n\t"
-        "1:                                             \n\t"
-        PREFETCH" 32(%1, %%"REG_a")                     \n\t"
-        "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
-        "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
-        "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
-        "pand                 %%mm5, %%mm0              \n\t"
-        "pand                 %%mm6, %%mm1              \n\t"
-        "pand                 %%mm7, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
-        "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
-        "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
-        "pand                 %%mm7, %%mm0              \n\t"
-        "pand                 %%mm5, %%mm1              \n\t"
-        "pand                 %%mm6, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
-        MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
-        "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
-        "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
-        "pand                 %%mm6, %%mm0              \n\t"
-        "pand                 %%mm7, %%mm1              \n\t"
-        "pand                 %%mm5, %%mm2              \n\t"
-        "por                  %%mm0, %%mm1              \n\t"
-        "por                  %%mm2, %%mm1              \n\t"
-        MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
-        "add                    $24, %%"REG_a"          \n\t"
-        " js                     1b                     \n\t"
-        "2:                                             \n\t"
-        : "+a" (mmx_size)
-        : "r" (src-mmx_size), "r"(dst-mmx_size)
-    );
-
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-
-    if (mmx_size==23) return; //finished, was multiple of 8
-
-    src+= src_size;
-    dst+= src_size;
-    src_size= 23-mmx_size;
-    src-= src_size;
-    dst-= src_size;
-#endif
     for (i=0; i<src_size; i+=3) {
         register uint8_t x;
         x          = src[i + 2];
@@ -1358,98 +367,16 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                     const uint8_t *vsrc, uint8_t *dst,
+                                     long width, long height,
+                                     long lumStride, long chromStride,
+                                     long dstStride, long vertLumPerChroma)
 {
     long y;
-    const x86_reg chromWidth= width>>1;
+    const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
-#if COMPILE_TEMPLATE_MMX
-        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
-            PREFETCH"    32(%2, %%"REG_a")              \n\t"
-            PREFETCH"    32(%3, %%"REG_a")              \n\t"
-            "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
-            "movq                    %%mm0, %%mm2       \n\t" // U(0)
-            "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
-            "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-            "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
-
-            "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
-            "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
-            "movq                    %%mm3, %%mm4       \n\t" // Y(0)
-            "movq                    %%mm5, %%mm6       \n\t" // Y(8)
-            "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
-            "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
-            "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
-            "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
-
-            MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
-        );
-#else
-
-#if ARCH_ALPHA && HAVE_MVI
-#define pl2yuy2(n)                  \
-    y1 = yc[n];                     \
-    y2 = yc2[n];                    \
-    u = uc[n];                      \
-    v = vc[n];                      \
-    __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
-    __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
-    __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
-    __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
-    yuv1 = (u << 8) + (v << 24);                \
-    yuv2 = yuv1 + y2;               \
-    yuv1 += y1;                     \
-    qdst[n]  = yuv1;                \
-    qdst2[n] = yuv2;
-
-        int i;
-        uint64_t *qdst = (uint64_t *) dst;
-        uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
-        const uint32_t *yc = (uint32_t *) ysrc;
-        const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
-        const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
-        for (i = 0; i < chromWidth; i += 8) {
-            uint64_t y1, y2, yuv1, yuv2;
-            uint64_t u, v;
-            /* Prefetch */
-            __asm__("ldq $31,64(%0)" :: "r"(yc));
-            __asm__("ldq $31,64(%0)" :: "r"(yc2));
-            __asm__("ldq $31,64(%0)" :: "r"(uc));
-            __asm__("ldq $31,64(%0)" :: "r"(vc));
-
-            pl2yuy2(0);
-            pl2yuy2(1);
-            pl2yuy2(2);
-            pl2yuy2(3);
-
-            yc    += 4;
-            yc2   += 4;
-            uc    += 4;
-            vc    += 4;
-            qdst  += 4;
-            qdst2 += 4;
-        }
-        y++;
-        ysrc += lumStride;
-        dst += dstStride;
-
-#elif HAVE_FAST_64BIT
+#if HAVE_FAST_64BIT
         int i;
         uint64_t *ldst = (uint64_t *) dst;
         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
@@ -1480,7 +407,6 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
             uc++;
             vc++;
         }
-#endif
 #endif
         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
             usrc += chromStride;
@@ -1489,70 +415,32 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
         ysrc += lumStride;
         dst  += dstStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(EMMS"       \n\t"
-            SFENCE"     \n\t"
-            :::"memory");
-#endif
 }
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
-static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                const uint8_t *vsrc, uint8_t *dst,
+                                long width, long height,
+                                long lumStride, long chromStride,
+                                long dstStride)
 {
     //FIXME interpolate chroma
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
+                      chromStride, dstStride, 2);
 }
 
-static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                     const uint8_t *vsrc, uint8_t *dst,
+                                     long width, long height,
+                                     long lumStride, long chromStride,
+                                     long dstStride, long vertLumPerChroma)
 {
     long y;
-    const x86_reg chromWidth= width>>1;
+    const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
-#if COMPILE_TEMPLATE_MMX
-        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-            "xor                %%"REG_a", %%"REG_a"    \n\t"
-            ".p2align                   4               \n\t"
-            "1:                                         \n\t"
-            PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
-            PREFETCH"   32(%2, %%"REG_a")               \n\t"
-            PREFETCH"   32(%3, %%"REG_a")               \n\t"
-            "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
-            "movq                   %%mm0, %%mm2        \n\t" // U(0)
-            "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
-            "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
-            "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
-
-            "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
-            "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
-            "movq                   %%mm0, %%mm4        \n\t" // Y(0)
-            "movq                   %%mm2, %%mm6        \n\t" // Y(8)
-            "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
-            "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
-            "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
-            "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
-
-            MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
-
-            "add                       $8, %%"REG_a"    \n\t"
-            "cmp                       %4, %%"REG_a"    \n\t"
-            " jb                       1b               \n\t"
-            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
-        );
-#else
-//FIXME adapt the Alpha ASM code from yv12->yuy2
-
 #if HAVE_FAST_64BIT
         int i;
         uint64_t *ldst = (uint64_t *) dst;
@@ -1584,7 +472,6 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
             uc++;
             vc++;
         }
-#endif
 #endif
         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
             usrc += chromStride;
@@ -1593,139 +480,62 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
         ysrc += lumStride;
         dst += dstStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(EMMS"       \n\t"
-            SFENCE"     \n\t"
-            :::"memory");
-#endif
 }
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
-static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                const uint8_t *vsrc, uint8_t *dst,
+                                long width, long height,
+                                long lumStride, long chromStride,
+                                long dstStride)
 {
     //FIXME interpolate chroma
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
+                      chromStride, dstStride, 2);
 }
 
 /**
  * Width should be a multiple of 16.
  */
-static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                   const uint8_t *vsrc, uint8_t *dst,
+                                   long width, long height,
+                                   long lumStride, long chromStride,
+                                   long dstStride)
 {
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
+                      chromStride, dstStride, 1);
 }
 
 /**
  * Width should be a multiple of 16.
  */
-static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
+                                   const uint8_t *vsrc, uint8_t *dst,
+                                   long width, long height,
+                                   long lumStride, long chromStride,
+                                   long dstStride)
 {
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
+                      chromStride, dstStride, 1);
 }
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
-static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
+                                uint8_t *udst, uint8_t *vdst,
+                                long width, long height,
+                                long lumStride, long chromStride,
+                                long srcStride)
 {
     long y;
-    const x86_reg chromWidth= width>>1;
+    const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-#if COMPILE_TEMPLATE_MMX
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            "pcmpeqw                 %%mm7, %%mm7       \n\t"
-            "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
-            ".p2align                    4              \n\t"
-            "1:                \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-            "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
-            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
-            "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
-            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
-            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
-            "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
-
-            MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
-
-            "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
-            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
-            "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
-            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
-            "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
-            "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
-            "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-            "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
-            "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
-            "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
-            "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
-            "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
-            "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
-            "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
-
-            MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
-            MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
-            "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
-            "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
-            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
-            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
-            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
-
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-#else
         long i;
         for (i=0; i<chromWidth; i++) {
             ydst[2*i+0]     = src[4*i+0];
@@ -1740,20 +550,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             ydst[2*i+0]     = src[4*i+0];
             ydst[2*i+1]     = src[4*i+2];
         }
-#endif
         udst += chromStride;
         vdst += chromStride;
         ydst += lumStride;
         src  += srcStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-#endif
 }
 
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
+                              long srcHeight, long srcStride, long dstStride)
 {
     long x,y;
 
@@ -1769,66 +574,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     dst+= dstStride;
 
     for (y=1; y<srcHeight; y++) {
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-        const x86_reg mmxSize= srcWidth&~15;
-        __asm__ volatile(
-            "mov           %4, %%"REG_a"            \n\t"
-            "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
-            "movq         (%0, %%"REG_a"), %%mm4    \n\t"
-            "movq                   %%mm4, %%mm2    \n\t"
-            "psllq                     $8, %%mm4    \n\t"
-            "pand                   %%mm0, %%mm2    \n\t"
-            "por                    %%mm2, %%mm4    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm5    \n\t"
-            "movq                   %%mm5, %%mm3    \n\t"
-            "psllq                     $8, %%mm5    \n\t"
-            "pand                   %%mm0, %%mm3    \n\t"
-            "por                    %%mm3, %%mm5    \n\t"
-            "1:                                     \n\t"
-            "movq         (%0, %%"REG_a"), %%mm0    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm1    \n\t"
-            "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
-            "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
-            PAVGB"                  %%mm0, %%mm5    \n\t"
-            PAVGB"                  %%mm0, %%mm3    \n\t"
-            PAVGB"                  %%mm0, %%mm5    \n\t"
-            PAVGB"                  %%mm0, %%mm3    \n\t"
-            PAVGB"                  %%mm1, %%mm4    \n\t"
-            PAVGB"                  %%mm1, %%mm2    \n\t"
-            PAVGB"                  %%mm1, %%mm4    \n\t"
-            PAVGB"                  %%mm1, %%mm2    \n\t"
-            "movq                   %%mm5, %%mm7    \n\t"
-            "movq                   %%mm4, %%mm6    \n\t"
-            "punpcklbw              %%mm3, %%mm5    \n\t"
-            "punpckhbw              %%mm3, %%mm7    \n\t"
-            "punpcklbw              %%mm2, %%mm4    \n\t"
-            "punpckhbw              %%mm2, %%mm6    \n\t"
-#if 1
-            MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#else
-            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#endif
-            "add                       $8, %%"REG_a"            \n\t"
-            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
-            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
-            " js                       1b                       \n\t"
-            :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
-               "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
-               "g" (-mmxSize)
-            : "%"REG_a
-        );
-#else
-        const x86_reg mmxSize=1;
+        const int mmxSize = 1;
 
         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
-#endif
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
@@ -1858,12 +607,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         dst[2*x+1]= src[x];
     }
 #endif
-
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-#endif
 }
 
 /**
@@ -1872,96 +615,15 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
  * Chrominance data is only taken from every second line, others are ignored.
  * FIXME: Write HQ version.
  */
-static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
+                                uint8_t *udst, uint8_t *vdst,
+                                long width, long height,
+                                long lumStride, long chromStride,
+                                long srcStride)
 {
     long y;
-    const x86_reg chromWidth= width>>1;
+    const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-#if COMPILE_TEMPLATE_MMX
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            "pcmpeqw             %%mm7, %%mm7   \n\t"
-            "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
-            ".p2align                4          \n\t"
-            "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
-            "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
-            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
-            "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
-            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
-            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
-            "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
-
-            MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
-
-            "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
-            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
-            "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
-            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
-            "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
-            "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
-            "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-            "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
-            "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
-            "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
-            "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
-            "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
-            "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
-            "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
-
-            MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
-            MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
-
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4          \n\t"
-            "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
-            "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
-            "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
-            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
-            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
-            "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
-            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
-
-            MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
-
-            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
-        );
-#else
         long i;
         for (i=0; i<chromWidth; i++) {
             udst[i]     = src[4*i+0];
@@ -1976,17 +638,11 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             ydst[2*i+0] = src[4*i+1];
             ydst[2*i+1] = src[4*i+3];
         }
-#endif
         udst += chromStride;
         vdst += chromStride;
         ydst += lumStride;
         src  += srcStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-#endif
 }
 
 /**
@@ -1996,249 +652,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
  * others are ignored in the C version.
  * FIXME: Write HQ version.
  */
-static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       long width, long height,
-                                       long lumStride, long chromStride, long srcStride)
+static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
+                                 uint8_t *udst, uint8_t *vdst,
+                                 long width, long height,
+                                 long lumStride, long chromStride,
+                                 long srcStride)
 {
     long y;
-    const x86_reg chromWidth= width>>1;
-#if COMPILE_TEMPLATE_MMX
-    for (y=0; y<height-2; y+=2) {
-        long i;
-        for (i=0; i<2; i++) {
-            __asm__ volatile(
-                "mov                        %2, %%"REG_a"   \n\t"
-                "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
-                "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-                ".p2align                    4              \n\t"
-                "1:                                         \n\t"
-                PREFETCH"    64(%0, %%"REG_d")              \n\t"
-                "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-                "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
-                "punpcklbw               %%mm7, %%mm0       \n\t"
-                "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
-                "punpcklbw               %%mm7, %%mm2       \n\t"
-                "punpcklbw               %%mm7, %%mm3       \n\t"
-                "pmaddwd                 %%mm6, %%mm0       \n\t"
-                "pmaddwd                 %%mm6, %%mm1       \n\t"
-                "pmaddwd                 %%mm6, %%mm2       \n\t"
-                "pmaddwd                 %%mm6, %%mm3       \n\t"
-#ifndef FAST_BGR2YV12
-                "psrad                      $8, %%mm0       \n\t"
-                "psrad                      $8, %%mm1       \n\t"
-                "psrad                      $8, %%mm2       \n\t"
-                "psrad                      $8, %%mm3       \n\t"
-#endif
-                "packssdw                %%mm1, %%mm0       \n\t"
-                "packssdw                %%mm3, %%mm2       \n\t"
-                "pmaddwd                 %%mm5, %%mm0       \n\t"
-                "pmaddwd                 %%mm5, %%mm2       \n\t"
-                "packssdw                %%mm2, %%mm0       \n\t"
-                "psraw                      $7, %%mm0       \n\t"
-
-                "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-                "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
-                "punpcklbw               %%mm7, %%mm4       \n\t"
-                "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
-                "punpcklbw               %%mm7, %%mm2       \n\t"
-                "punpcklbw               %%mm7, %%mm3       \n\t"
-                "pmaddwd                 %%mm6, %%mm4       \n\t"
-                "pmaddwd                 %%mm6, %%mm1       \n\t"
-                "pmaddwd                 %%mm6, %%mm2       \n\t"
-                "pmaddwd                 %%mm6, %%mm3       \n\t"
-#ifndef FAST_BGR2YV12
-                "psrad                      $8, %%mm4       \n\t"
-                "psrad                      $8, %%mm1       \n\t"
-                "psrad                      $8, %%mm2       \n\t"
-                "psrad                      $8, %%mm3       \n\t"
-#endif
-                "packssdw                %%mm1, %%mm4       \n\t"
-                "packssdw                %%mm3, %%mm2       \n\t"
-                "pmaddwd                 %%mm5, %%mm4       \n\t"
-                "pmaddwd                 %%mm5, %%mm2       \n\t"
-                "add                       $24, %%"REG_d"   \n\t"
-                "packssdw                %%mm2, %%mm4       \n\t"
-                "psraw                      $7, %%mm4       \n\t"
-
-                "packuswb                %%mm4, %%mm0       \n\t"
-                "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
-
-                MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
-                "add                        $8,      %%"REG_a"  \n\t"
-                " js                        1b                  \n\t"
-                : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
-                : "%"REG_a, "%"REG_d
-            );
-            ydst += lumStride;
-            src  += srcStride;
-        }
-        src -= srcStride*2;
-        __asm__ volatile(
-            "mov                        %4, %%"REG_a"   \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
-            "pxor                    %%mm7, %%mm7       \n\t"
-            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-            "add                 %%"REG_d", %%"REG_d"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                         \n\t"
-            PREFETCH"    64(%0, %%"REG_d")              \n\t"
-            PREFETCH"    64(%1, %%"REG_d")              \n\t"
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-            "movq          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movq          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
-            PAVGB"                   %%mm1, %%mm0       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "movq                    %%mm0, %%mm1       \n\t"
-            "movq                    %%mm2, %%mm3       \n\t"
-            "psrlq                     $24, %%mm0       \n\t"
-            "psrlq                     $24, %%mm2       \n\t"
-            PAVGB"                   %%mm1, %%mm0       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movd          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm0       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm0       \n\t"
-            "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm4, %%mm2       \n\t"
-            "psrlw                      $2, %%mm0       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
-            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
-            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
-
-            "pmaddwd                 %%mm0, %%mm1       \n\t"
-            "pmaddwd                 %%mm2, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm0       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-#ifndef FAST_BGR2YV12
-            "psrad                      $8, %%mm0       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-#endif
-            "packssdw                %%mm2, %%mm0       \n\t"
-            "packssdw                %%mm3, %%mm1       \n\t"
-            "pmaddwd                 %%mm5, %%mm0       \n\t"
-            "pmaddwd                 %%mm5, %%mm1       \n\t"
-            "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
-            "psraw                      $7, %%mm0       \n\t"
-
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-            "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
-            PAVGB"                   %%mm1, %%mm4       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "movq                    %%mm4, %%mm1       \n\t"
-            "movq                    %%mm2, %%mm3       \n\t"
-            "psrlq                     $24, %%mm4       \n\t"
-            "psrlq                     $24, %%mm2       \n\t"
-            PAVGB"                   %%mm1, %%mm4       \n\t"
-            PAVGB"                   %%mm3, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm4       \n\t"
-            "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
-            "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm5       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm5       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm5, %%mm2       \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "psrlw                      $2, %%mm4       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
-            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
-            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
-
-            "pmaddwd                 %%mm4, %%mm1       \n\t"
-            "pmaddwd                 %%mm2, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm4       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-#ifndef FAST_BGR2YV12
-            "psrad                      $8, %%mm4       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-#endif
-            "packssdw                %%mm2, %%mm4       \n\t"
-            "packssdw                %%mm3, %%mm1       \n\t"
-            "pmaddwd                 %%mm5, %%mm4       \n\t"
-            "pmaddwd                 %%mm5, %%mm1       \n\t"
-            "add                       $24, %%"REG_d"   \n\t"
-            "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
-            "psraw                      $7, %%mm4       \n\t"
-
-            "movq                    %%mm0, %%mm1           \n\t"
-            "punpckldq               %%mm4, %%mm0           \n\t"
-            "punpckhdq               %%mm4, %%mm1           \n\t"
-            "packsswb                %%mm1, %%mm0           \n\t"
-            "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
-            "movd                    %%mm0, (%2, %%"REG_a") \n\t"
-            "punpckhdq               %%mm0, %%mm0           \n\t"
-            "movd                    %%mm0, (%3, %%"REG_a") \n\t"
-            "add                        $4, %%"REG_a"       \n\t"
-            " js                        1b                  \n\t"
-            : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
-            : "%"REG_a, "%"REG_d
-        );
-
-        udst += chromStride;
-        vdst += chromStride;
-        src  += srcStride*2;
-    }
-
-    __asm__ volatile(EMMS"       \n\t"
-                     SFENCE"     \n\t"
-                     :::"memory");
-#else
+    const int chromWidth = width >> 1;
     y=0;
-#endif
     for (; y<height; y+=2) {
         long i;
         for (i=0; i<chromWidth; i++) {
@@ -2290,194 +712,55 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
     }
 }
 
-static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
-                             long width, long height, long src1Stride,
-                             long src2Stride, long dstStride)
+static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
+                              uint8_t *dest, long width,
+                              long height, long src1Stride,
+                              long src2Stride, long dstStride)
 {
     long h;
 
     for (h=0; h < height; h++) {
         long w;
-
-#if COMPILE_TEMPLATE_MMX
-#if COMPILE_TEMPLATE_SSE2
-        __asm__(
-            "xor              %%"REG_a", %%"REG_a"  \n\t"
-            "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
-            "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
-            "punpcklbw           %%xmm2, %%xmm0     \n\t"
-            "punpckhbw           %%xmm2, %%xmm1     \n\t"
-            "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
-            "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
-            " jb                     1b             \n\t"
-            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", "%"REG_a""
-        );
-#else
-        __asm__(
-            "xor %%"REG_a", %%"REG_a"               \n\t"
-            "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movq       (%1, %%"REG_a"), %%mm0      \n\t"
-            "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
-            "movq                 %%mm0, %%mm1      \n\t"
-            "movq                 %%mm2, %%mm3      \n\t"
-            "movq       (%2, %%"REG_a"), %%mm4      \n\t"
-            "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
-            "punpcklbw            %%mm4, %%mm0      \n\t"
-            "punpckhbw            %%mm4, %%mm1      \n\t"
-            "punpcklbw            %%mm5, %%mm2      \n\t"
-            "punpckhbw            %%mm5, %%mm3      \n\t"
-            MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
-            " jb                     1b             \n\t"
-            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", "%"REG_a
-        );
-#endif
-        for (w= (width&(~15)); w < width; w++) {
-            dest[2*w+0] = src1[w];
-            dest[2*w+1] = src2[w];
-        }
-#else
         for (w=0; w < width; w++) {
             dest[2*w+0] = src1[w];
             dest[2*w+1] = src2[w];
         }
-#endif
         dest += dstStride;
         src1 += src1Stride;
         src2 += src2Stride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-            );
-#endif
 }
 
-static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                                       uint8_t *dst1, uint8_t *dst2,
-                                       long width, long height,
-                                       long srcStride1, long srcStride2,
-                                       long dstStride1, long dstStride2)
+static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
+                                 uint8_t *dst1, uint8_t *dst2,
+                                 long width, long height,
+                                 long srcStride1, long srcStride2,
+                                 long dstStride1, long dstStride2)
 {
-    x86_reg y;
+    int y;
     long x,w,h;
     w=width/2; h=height/2;
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        PREFETCH" %0    \n\t"
-        PREFETCH" %1    \n\t"
-        ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
-#endif
     for (y=0;y<h;y++) {
         const uint8_t* s1=src1+srcStride1*(y>>1);
         uint8_t* d=dst1+dstStride1*y;
         x=0;
-#if COMPILE_TEMPLATE_MMX
-        for (;x<w-31;x+=32) {
-            __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
-                "movq      %%mm0, %%mm1 \n\t"
-                "movq      %%mm2, %%mm3 \n\t"
-                "movq      %%mm4, %%mm5 \n\t"
-                "movq      %%mm6, %%mm7 \n\t"
-                "punpcklbw %%mm0, %%mm0 \n\t"
-                "punpckhbw %%mm1, %%mm1 \n\t"
-                "punpcklbw %%mm2, %%mm2 \n\t"
-                "punpckhbw %%mm3, %%mm3 \n\t"
-                "punpcklbw %%mm4, %%mm4 \n\t"
-                "punpckhbw %%mm5, %%mm5 \n\t"
-                "punpcklbw %%mm6, %%mm6 \n\t"
-                "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s1[x])
-                :"memory");
-        }
-#endif
         for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
     }
     for (y=0;y<h;y++) {
         const uint8_t* s2=src2+srcStride2*(y>>1);
         uint8_t* d=dst2+dstStride2*y;
         x=0;
-#if COMPILE_TEMPLATE_MMX
-        for (;x<w-31;x+=32) {
-            __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
-                "movq      %%mm0, %%mm1 \n\t"
-                "movq      %%mm2, %%mm3 \n\t"
-                "movq      %%mm4, %%mm5 \n\t"
-                "movq      %%mm6, %%mm7 \n\t"
-                "punpcklbw %%mm0, %%mm0 \n\t"
-                "punpckhbw %%mm1, %%mm1 \n\t"
-                "punpcklbw %%mm2, %%mm2 \n\t"
-                "punpckhbw %%mm3, %%mm3 \n\t"
-                "punpcklbw %%mm4, %%mm4 \n\t"
-                "punpckhbw %%mm5, %%mm5 \n\t"
-                "punpcklbw %%mm6, %%mm6 \n\t"
-                "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s2[x])
-                :"memory");
-        }
-#endif
         for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                                        uint8_t *dst,
-                                        long width, long height,
-                                        long srcStride1, long srcStride2,
-                                        long srcStride3, long dstStride)
+static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
+                                  const uint8_t *src3, uint8_t *dst,
+                                  long width, long height,
+                                  long srcStride1, long srcStride2,
+                                  long srcStride3, long dstStride)
 {
-    x86_reg x;
+    int x;
     long y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++) {
@@ -2486,60 +769,6 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
         const uint8_t* vp=src3+srcStride3*(y>>2);
         uint8_t* d=dst+dstStride*y;
         x=0;
-#if COMPILE_TEMPLATE_MMX
-        for (;x<w-7;x+=8) {
-            __asm__ volatile(
-                PREFETCH"   32(%1, %0)          \n\t"
-                PREFETCH"   32(%2, %0)          \n\t"
-                PREFETCH"   32(%3, %0)          \n\t"
-                "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-                "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
-                "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
-                "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-                "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
-                "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
-                "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
-                "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
-                "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
-                "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
-
-                "movq            %%mm1, %%mm6   \n\t"
-                "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
-                "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
-                "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
-                MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
-                MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
-
-                "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
-                "movq     8(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
-                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
-                MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
-
-                "movq            %%mm4, %%mm6   \n\t"
-                "movq    16(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm5, %%mm4   \n\t"
-                "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
-                "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
-                MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
-
-                "punpckhbw       %%mm5, %%mm6   \n\t"
-                "movq    24(%1, %0, 4), %%mm0   \n\t"
-                "movq            %%mm0, %%mm3   \n\t"
-                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
-                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
-                MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
-                MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
-
-                : "+r" (x)
-                : "r"(yp), "r" (up), "r"(vp), "r"(d)
-                :"memory");
-        }
-#endif
         for (; x<w; x++) {
             const long x2 = x<<2;
             d[8*x+0] = yp[x2];
@@ -2552,95 +781,27 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
             d[8*x+7] = vp[x];
         }
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
+static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
 {
     dst +=   count;
     src += 2*count;
     count= - count;
 
-#if COMPILE_TEMPLATE_MMX
-    if(count <= -16) {
-        count += 15;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -30(%1, %0, 2), %%mm0        \n\t"
-            "movq -22(%1, %0, 2), %%mm1        \n\t"
-            "movq -14(%1, %0, 2), %%mm2        \n\t"
-            "movq  -6(%1, %0, 2), %%mm3        \n\t"
-            "pand          %%mm7, %%mm0        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm2        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            MOVNTQ"        %%mm0,-15(%2, %0)   \n\t"
-            MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t"
-            "add             $16, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst)
-        );
-        count -= 15;
-    }
-#endif
     while(count<0) {
         dst[count]= src[2*count];
         count++;
     }
 }
 
-static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
+                            int count)
 {
     dst0+=   count;
     dst1+=   count;
     src += 4*count;
     count= - count;
-#if COMPILE_TEMPLATE_MMX
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -28(%1, %0, 4), %%mm0        \n\t"
-            "movq -20(%1, %0, 4), %%mm1        \n\t"
-            "movq -12(%1, %0, 4), %%mm2        \n\t"
-            "movq  -4(%1, %0, 4), %%mm3        \n\t"
-            "pand          %%mm7, %%mm0        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm2        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            "movq          %%mm0, %%mm1        \n\t"
-            "movq          %%mm2, %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm2, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm1        \n\t"
-            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
-            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
-            "add              $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
     while(count<0) {
         dst0[count]= src[4*count+0];
         dst1[count]= src[4*count+2];
@@ -2648,52 +809,14 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
     }
 }
 
-static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
+                               uint8_t *dst0, uint8_t *dst1, int count)
 {
     dst0 +=   count;
     dst1 +=   count;
     src0 += 4*count;
     src1 += 4*count;
     count= - count;
-#ifdef PAVGB
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw        %%mm7, %%mm7        \n\t"
-            "psrlw             $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq  -28(%1, %0, 4), %%mm0        \n\t"
-            "movq  -20(%1, %0, 4), %%mm1        \n\t"
-            "movq  -12(%1, %0, 4), %%mm2        \n\t"
-            "movq   -4(%1, %0, 4), %%mm3        \n\t"
-            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
-            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
-            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
-            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
-            "pand           %%mm7, %%mm0        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm2        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm1, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm2        \n\t"
-            "movq           %%mm0, %%mm1        \n\t"
-            "movq           %%mm2, %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm2, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm1        \n\t"
-            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
-            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
-            "add               $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
     while(count<0) {
         dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
         dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
@@ -2701,47 +824,13 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
     }
 }
 
-static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
+                           int count)
 {
     dst0+=   count;
     dst1+=   count;
     src += 4*count;
     count= - count;
-#if COMPILE_TEMPLATE_MMX
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw       %%mm7, %%mm7        \n\t"
-            "psrlw            $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq -28(%1, %0, 4), %%mm0        \n\t"
-            "movq -20(%1, %0, 4), %%mm1        \n\t"
-            "movq -12(%1, %0, 4), %%mm2        \n\t"
-            "movq  -4(%1, %0, 4), %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm1        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "psrlw            $8, %%mm3        \n\t"
-            "packuswb      %%mm1, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm2        \n\t"
-            "movq          %%mm0, %%mm1        \n\t"
-            "movq          %%mm2, %%mm3        \n\t"
-            "psrlw            $8, %%mm0        \n\t"
-            "psrlw            $8, %%mm2        \n\t"
-            "pand          %%mm7, %%mm1        \n\t"
-            "pand          %%mm7, %%mm3        \n\t"
-            "packuswb      %%mm2, %%mm0        \n\t"
-            "packuswb      %%mm3, %%mm1        \n\t"
-            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
-            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
-            "add              $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
     src++;
     while(count<0) {
         dst0[count]= src[4*count+0];
@@ -2750,52 +839,14 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
     }
 }
 
-static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
+                              uint8_t *dst0, uint8_t *dst1, int count)
 {
     dst0 +=   count;
     dst1 +=   count;
     src0 += 4*count;
     src1 += 4*count;
     count= - count;
-#ifdef PAVGB
-    if(count <= -8) {
-        count += 7;
-        __asm__ volatile(
-            "pcmpeqw        %%mm7, %%mm7        \n\t"
-            "psrlw             $8, %%mm7        \n\t"
-            "1:                                \n\t"
-            "movq  -28(%1, %0, 4), %%mm0        \n\t"
-            "movq  -20(%1, %0, 4), %%mm1        \n\t"
-            "movq  -12(%1, %0, 4), %%mm2        \n\t"
-            "movq   -4(%1, %0, 4), %%mm3        \n\t"
-            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
-            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
-            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
-            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm1        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "psrlw             $8, %%mm3        \n\t"
-            "packuswb       %%mm1, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm2        \n\t"
-            "movq           %%mm0, %%mm1        \n\t"
-            "movq           %%mm2, %%mm3        \n\t"
-            "psrlw             $8, %%mm0        \n\t"
-            "psrlw             $8, %%mm2        \n\t"
-            "pand           %%mm7, %%mm1        \n\t"
-            "pand           %%mm7, %%mm3        \n\t"
-            "packuswb       %%mm2, %%mm0        \n\t"
-            "packuswb       %%mm3, %%mm1        \n\t"
-            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
-            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
-            "add               $8, %0           \n\t"
-            " js 1b                            \n\t"
-            : "+r"(count)
-            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
-        );
-        count -= 7;
-    }
-#endif
     src0++;
     src1++;
     while(count<0) {
@@ -2805,17 +856,17 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
     }
 }
 
-static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                           const uint8_t *src, long width, long height,
+                           long lumStride, long chromStride, long srcStride)
 {
     long y;
     const long chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
-        RENAME(extract_even)(src, ydst, width);
+        extract_even_c(src, ydst, width);
         if(y&1) {
-            RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
+            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
             udst+= chromStride;
             vdst+= chromStride;
         }
@@ -2823,51 +874,37 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         src += srcStride;
         ydst+= lumStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                           const uint8_t *src, long width, long height,
+                           long lumStride, long chromStride, long srcStride)
 {
     long y;
     const long chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
-        RENAME(extract_even)(src, ydst, width);
-        RENAME(extract_odd2)(src, udst, vdst, chromWidth);
+        extract_even_c(src, ydst, width);
+        extract_odd2_c(src, udst, vdst, chromWidth);
 
         src += srcStride;
         ydst+= lumStride;
         udst+= chromStride;
         vdst+= chromStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                           const uint8_t *src, long width, long height,
+                           long lumStride, long chromStride, long srcStride)
 {
     long y;
     const long chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
-        RENAME(extract_even)(src+1, ydst, width);
+        extract_even_c(src + 1, ydst, width);
         if(y&1) {
-            RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
+            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
             udst+= chromStride;
             vdst+= chromStride;
         }
@@ -2875,73 +912,59 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         src += srcStride;
         ydst+= lumStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                           const uint8_t *src, long width, long height,
+                           long lumStride, long chromStride, long srcStride)
 {
     long y;
     const long chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
-        RENAME(extract_even)(src+1, ydst, width);
-        RENAME(extract_even2)(src, udst, vdst, chromWidth);
+        extract_even_c(src + 1, ydst, width);
+        extract_even2_c(src, udst, vdst, chromWidth);
 
         src += srcStride;
         ydst+= lumStride;
         udst+= chromStride;
         vdst+= chromStride;
     }
-#if COMPILE_TEMPLATE_MMX
-    __asm__(
-            EMMS"       \n\t"
-            SFENCE"     \n\t"
-            ::: "memory"
-        );
-#endif
 }
 
-static inline void RENAME(rgb2rgb_init)(void)
+static inline void rgb2rgb_init_c(void)
 {
-    rgb15to16       = RENAME(rgb15to16);
-    rgb15tobgr24    = RENAME(rgb15tobgr24);
-    rgb15to32       = RENAME(rgb15to32);
-    rgb16tobgr24    = RENAME(rgb16tobgr24);
-    rgb16to32       = RENAME(rgb16to32);
-    rgb16to15       = RENAME(rgb16to15);
-    rgb24tobgr16    = RENAME(rgb24tobgr16);
-    rgb24tobgr15    = RENAME(rgb24tobgr15);
-    rgb24tobgr32    = RENAME(rgb24tobgr32);
-    rgb32to16       = RENAME(rgb32to16);
-    rgb32to15       = RENAME(rgb32to15);
-    rgb32tobgr24    = RENAME(rgb32tobgr24);
-    rgb24to15       = RENAME(rgb24to15);
-    rgb24to16       = RENAME(rgb24to16);
-    rgb24tobgr24    = RENAME(rgb24tobgr24);
-    shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
-    rgb32tobgr16    = RENAME(rgb32tobgr16);
-    rgb32tobgr15    = RENAME(rgb32tobgr15);
-    yv12toyuy2      = RENAME(yv12toyuy2);
-    yv12touyvy      = RENAME(yv12touyvy);
-    yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
-    yuv422ptouyvy   = RENAME(yuv422ptouyvy);
-    yuy2toyv12      = RENAME(yuy2toyv12);
-    planar2x        = RENAME(planar2x);
-    rgb24toyv12     = RENAME(rgb24toyv12);
-    interleaveBytes = RENAME(interleaveBytes);
-    vu9_to_vu12     = RENAME(vu9_to_vu12);
-    yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
+    rgb15to16          = rgb15to16_c;
+    rgb15tobgr24       = rgb15tobgr24_c;
+    rgb15to32          = rgb15to32_c;
+    rgb16tobgr24       = rgb16tobgr24_c;
+    rgb16to32          = rgb16to32_c;
+    rgb16to15          = rgb16to15_c;
+    rgb24tobgr16       = rgb24tobgr16_c;
+    rgb24tobgr15       = rgb24tobgr15_c;
+    rgb24tobgr32       = rgb24tobgr32_c;
+    rgb32to16          = rgb32to16_c;
+    rgb32to15          = rgb32to15_c;
+    rgb32tobgr24       = rgb32tobgr24_c;
+    rgb24to15          = rgb24to15_c;
+    rgb24to16          = rgb24to16_c;
+    rgb24tobgr24       = rgb24tobgr24_c;
+    shuffle_bytes_2103 = shuffle_bytes_2103_c;
+    rgb32tobgr16       = rgb32tobgr16_c;
+    rgb32tobgr15       = rgb32tobgr15_c;
+    yv12toyuy2         = yv12toyuy2_c;
+    yv12touyvy         = yv12touyvy_c;
+    yuv422ptoyuy2      = yuv422ptoyuy2_c;
+    yuv422ptouyvy      = yuv422ptouyvy_c;
+    yuy2toyv12         = yuy2toyv12_c;
+    planar2x           = planar2x_c;
+    rgb24toyv12        = rgb24toyv12_c;
+    interleaveBytes    = interleaveBytes_c;
+    vu9_to_vu12        = vu9_to_vu12_c;
+    yvu9_to_yuy2       = yvu9_to_yuy2_c;
 
-    uyvytoyuv420    = RENAME(uyvytoyuv420);
-    uyvytoyuv422    = RENAME(uyvytoyuv422);
-    yuyvtoyuv420    = RENAME(yuyvtoyuv420);
-    yuyvtoyuv422    = RENAME(yuyvtoyuv422);
+    uyvytoyuv420       = uyvytoyuv420_c;
+    uyvytoyuv422       = uyvytoyuv422_c;
+    yuyvtoyuv420       = yuyvtoyuv420_c;
+    yuyvtoyuv422       = yuyvtoyuv422_c;
 }
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index e4e69cf819..e160c526ad 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -122,63 +122,6 @@ add BGR4 output support
 write special BGR->BGR scaler
 */
 
-#if ARCH_X86
-DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
-DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
-DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
-DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
-DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
-DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-        0x0103010301030103LL,
-        0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-        0x0602060206020602LL,
-        0x0004000400040004LL,};
-
-DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
-DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
-
-#ifdef FAST_BGR2YV12
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
-#else
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
-#endif /* FAST_BGR2YV12 */
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
-    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
-    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
-};
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
-
-#endif /* ARCH_X86 */
-
 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
 {  1,   3,   1,   3,   1,   3,   1,   3, },
 {  2,   0,   2,   0,   2,   0,   2,   0, },
@@ -1367,17 +1310,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define COMPILE_TEMPLATE_ALTIVEC 0
 
-#if COMPILE_C
-#define RENAME(a) a ## _C
 #include "swscale_template.c"
-#endif
 
 #if COMPILE_ALTIVEC
 #undef RENAME
 #undef COMPILE_TEMPLATE_ALTIVEC
 #define COMPILE_TEMPLATE_ALTIVEC 1
 #define RENAME(a) a ## _altivec
-#include "swscale_template.c"
+#include "ppc/swscale_template.c"
 #endif
 
 #if ARCH_X86
@@ -1392,7 +1332,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #define COMPILE_TEMPLATE_MMX2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX
-#include "swscale_template.c"
+#include "x86/swscale_template.c"
 #endif
 
 //MMX2 versions
@@ -1405,7 +1345,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #define COMPILE_TEMPLATE_MMX2 1
 #define COMPILE_TEMPLATE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX2
-#include "swscale_template.c"
+#include "x86/swscale_template.c"
 #endif
 
 //3DNOW versions
@@ -1418,44 +1358,36 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 #define COMPILE_TEMPLATE_MMX2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNow
-#include "swscale_template.c"
+#include "x86/swscale_template.c"
 #endif
 
 #endif //ARCH_X86
 
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
-#if CONFIG_RUNTIME_CPUDETECT
-    int flags = c->flags;
+    sws_init_swScale_c(c);
 
+#if CONFIG_RUNTIME_CPUDETECT
 #if ARCH_X86
     // ordered per speed fastest first
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (c->flags & SWS_CPU_CAPS_MMX2) {
         sws_init_swScale_MMX2(c);
         return swScale_MMX2;
-    } else if (flags & SWS_CPU_CAPS_3DNOW) {
+    } else if (c->flags & SWS_CPU_CAPS_3DNOW) {
         sws_init_swScale_3DNow(c);
         return swScale_3DNow;
-    } else if (flags & SWS_CPU_CAPS_MMX) {
+    } else if (c->flags & SWS_CPU_CAPS_MMX) {
         sws_init_swScale_MMX(c);
         return swScale_MMX;
-    } else {
-        sws_init_swScale_C(c);
-        return swScale_C;
     }
 
 #else
 #if COMPILE_ALTIVEC
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (c->flags & SWS_CPU_CAPS_ALTIVEC) {
         sws_init_swScale_altivec(c);
         return swScale_altivec;
-    } else {
-        sws_init_swScale_C(c);
-        return swScale_C;
     }
 #endif
-    sws_init_swScale_C(c);
-    return swScale_C;
 #endif /* ARCH_X86 */
 #else //CONFIG_RUNTIME_CPUDETECT
 #if   COMPILE_TEMPLATE_MMX2
@@ -1470,11 +1402,10 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
 #elif COMPILE_TEMPLATE_ALTIVEC
     sws_init_swScale_altivec(c);
     return swScale_altivec;
-#else
-    sws_init_swScale_C(c);
-    return swScale_C;
 #endif
 #endif //!CONFIG_RUNTIME_CPUDETECT
+
+    return swScale_c;
 }
 
 static void copyPlane(const uint8_t *src, int srcStride,
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 827a32417b..2a73c0e8df 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -18,969 +18,35 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#undef REAL_MOVNTQ
-#undef MOVNTQ
-#undef PAVGB
-#undef PREFETCH
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#elif COMPILE_TEMPLATE_MMX2
-#define PREFETCH "prefetchnta"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_MMX2
-#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif COMPILE_TEMPLATE_AMD3DNOW
-#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
-#endif
-
-#if COMPILE_TEMPLATE_MMX2
-#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
-#else
-#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
-#endif
-#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
-
-#if COMPILE_TEMPLATE_ALTIVEC
-#include "ppc/swscale_altivec_template.c"
-#endif
-
-#define YSCALEYUV2YV12X(x, offset, dest, width) \
-    __asm__ volatile(\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-        "movq                             %%mm3, %%mm4      \n\t"\
-        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        ".p2align                             4             \n\t" /* FIXME Unroll? */\
-        "1:                                                 \n\t"\
-        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
-        "add                                $16, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
-        "pmulhw                           %%mm0, %%mm2      \n\t"\
-        "pmulhw                           %%mm0, %%mm5      \n\t"\
-        "paddw                            %%mm2, %%mm3      \n\t"\
-        "paddw                            %%mm5, %%mm4      \n\t"\
-        " jnz                                1b             \n\t"\
-        "psraw                               $3, %%mm3      \n\t"\
-        "psraw                               $3, %%mm4      \n\t"\
-        "packuswb                         %%mm4, %%mm3      \n\t"\
-        MOVNTQ(%%mm3, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-        "movq                             %%mm3, %%mm4      \n\t"\
-        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "jb                                  1b             \n\t"\
-        :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
-        : "%"REG_a, "%"REG_d, "%"REG_S\
-    );
-
-#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
-    __asm__ volatile(\
-        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
-        "pxor                             %%mm4, %%mm4      \n\t"\
-        "pxor                             %%mm5, %%mm5      \n\t"\
-        "pxor                             %%mm6, %%mm6      \n\t"\
-        "pxor                             %%mm7, %%mm7      \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        ".p2align                             4             \n\t"\
-        "1:                                                 \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-        "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
-        "movq                             %%mm0, %%mm3      \n\t"\
-        "punpcklwd                        %%mm1, %%mm0      \n\t"\
-        "punpckhwd                        %%mm1, %%mm3      \n\t"\
-        "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
-        "pmaddwd                          %%mm1, %%mm0      \n\t"\
-        "pmaddwd                          %%mm1, %%mm3      \n\t"\
-        "paddd                            %%mm0, %%mm4      \n\t"\
-        "paddd                            %%mm3, %%mm5      \n\t"\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
-        "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
-        "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
-        "movq                             %%mm2, %%mm0      \n\t"\
-        "punpcklwd                        %%mm3, %%mm2      \n\t"\
-        "punpckhwd                        %%mm3, %%mm0      \n\t"\
-        "pmaddwd                          %%mm1, %%mm2      \n\t"\
-        "pmaddwd                          %%mm1, %%mm0      \n\t"\
-        "paddd                            %%mm2, %%mm6      \n\t"\
-        "paddd                            %%mm0, %%mm7      \n\t"\
-        " jnz                                1b             \n\t"\
-        "psrad                              $16, %%mm4      \n\t"\
-        "psrad                              $16, %%mm5      \n\t"\
-        "psrad                              $16, %%mm6      \n\t"\
-        "psrad                              $16, %%mm7      \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
-        "packssdw                         %%mm5, %%mm4      \n\t"\
-        "packssdw                         %%mm7, %%mm6      \n\t"\
-        "paddw                            %%mm0, %%mm4      \n\t"\
-        "paddw                            %%mm0, %%mm6      \n\t"\
-        "psraw                               $3, %%mm4      \n\t"\
-        "psraw                               $3, %%mm6      \n\t"\
-        "packuswb                         %%mm6, %%mm4      \n\t"\
-        MOVNTQ(%%mm4, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
-        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "pxor                             %%mm4, %%mm4      \n\t"\
-        "pxor                             %%mm5, %%mm5      \n\t"\
-        "pxor                             %%mm6, %%mm6      \n\t"\
-        "pxor                             %%mm7, %%mm7      \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "jb                                  1b             \n\t"\
-        :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
-        : "%"REG_a, "%"REG_d, "%"REG_S\
-    );
-
-#define YSCALEYUV2YV121 \
-    "mov %2, %%"REG_a"                    \n\t"\
-    ".p2align               4             \n\t" /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
-#define YSCALEYUV2YV121_ACCURATE \
-    "mov %2, %%"REG_a"                    \n\t"\
-    "pcmpeqw %%mm7, %%mm7                 \n\t"\
-    "psrlw                 $15, %%mm7     \n\t"\
-    "psllw                  $6, %%mm7     \n\t"\
-    ".p2align                4            \n\t" /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "paddsw             %%mm7, %%mm0      \n\t"\
-    "paddsw             %%mm7, %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
-/*
-    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
-       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
-       "r" (dest), "m" (dstW_reg),
-       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
-    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
-*/
-#define YSCALEYUV2PACKEDX_UV \
-    __asm__ volatile(\
-        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
-        ".p2align                      4                \n\t"\
-        "nop                                            \n\t"\
-        "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
-        "movq                      %%mm3, %%mm4         \n\t"\
-        ".p2align                      4                \n\t"\
-        "2:                                             \n\t"\
-        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
-        "add                         $16, %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "pmulhw                    %%mm0, %%mm2         \n\t"\
-        "pmulhw                    %%mm0, %%mm5         \n\t"\
-        "paddw                     %%mm2, %%mm3         \n\t"\
-        "paddw                     %%mm5, %%mm4         \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
-        " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
-    "movq                    "#dst1", "#dst2"       \n\t"\
-    ".p2align                      4                \n\t"\
-    "2:                                             \n\t"\
-    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
-    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
-    "add                         $16, %%"REG_d"            \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pmulhw                 "#coeff", "#src1"       \n\t"\
-    "pmulhw                 "#coeff", "#src2"       \n\t"\
-    "paddw                   "#src1", "#dst1"       \n\t"\
-    "paddw                   "#src2", "#dst2"       \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX \
-    YSCALEYUV2PACKEDX_UV \
-    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
-
-#define YSCALEYUV2PACKEDX_END                     \
-        :: "r" (&c->redDither),                   \
-            "m" (dummy), "m" (dummy), "m" (dummy),\
-            "r" (dest), "m" (dstW_reg)            \
-        : "%"REG_a, "%"REG_d, "%"REG_S            \
-    );
-
-#define YSCALEYUV2PACKEDX_ACCURATE_UV \
-    __asm__ volatile(\
-        "xor %%"REG_a", %%"REG_a"                       \n\t"\
-        ".p2align                      4                \n\t"\
-        "nop                                            \n\t"\
-        "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-        "pxor                      %%mm4, %%mm4         \n\t"\
-        "pxor                      %%mm5, %%mm5         \n\t"\
-        "pxor                      %%mm6, %%mm6         \n\t"\
-        "pxor                      %%mm7, %%mm7         \n\t"\
-        ".p2align                      4                \n\t"\
-        "2:                                             \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
-        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
-        "movq                      %%mm0, %%mm3         \n\t"\
-        "punpcklwd                 %%mm1, %%mm0         \n\t"\
-        "punpckhwd                 %%mm1, %%mm3         \n\t"\
-        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
-        "pmaddwd                   %%mm1, %%mm0         \n\t"\
-        "pmaddwd                   %%mm1, %%mm3         \n\t"\
-        "paddd                     %%mm0, %%mm4         \n\t"\
-        "paddd                     %%mm3, %%mm5         \n\t"\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
-        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
-        "movq                      %%mm2, %%mm0         \n\t"\
-        "punpcklwd                 %%mm3, %%mm2         \n\t"\
-        "punpckhwd                 %%mm3, %%mm0         \n\t"\
-        "pmaddwd                   %%mm1, %%mm2         \n\t"\
-        "pmaddwd                   %%mm1, %%mm0         \n\t"\
-        "paddd                     %%mm2, %%mm6         \n\t"\
-        "paddd                     %%mm0, %%mm7         \n\t"\
-        " jnz                         2b                \n\t"\
-        "psrad                       $16, %%mm4         \n\t"\
-        "psrad                       $16, %%mm5         \n\t"\
-        "psrad                       $16, %%mm6         \n\t"\
-        "psrad                       $16, %%mm7         \n\t"\
-        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-        "packssdw                  %%mm5, %%mm4         \n\t"\
-        "packssdw                  %%mm7, %%mm6         \n\t"\
-        "paddw                     %%mm0, %%mm4         \n\t"\
-        "paddw                     %%mm0, %%mm6         \n\t"\
-        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
-        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pxor                      %%mm1, %%mm1         \n\t"\
-    "pxor                      %%mm5, %%mm5         \n\t"\
-    "pxor                      %%mm7, %%mm7         \n\t"\
-    "pxor                      %%mm6, %%mm6         \n\t"\
-    ".p2align                      4                \n\t"\
-    "2:                                             \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
-    "movq                      %%mm0, %%mm3         \n\t"\
-    "punpcklwd                 %%mm4, %%mm0         \n\t"\
-    "punpckhwd                 %%mm4, %%mm3         \n\t"\
-    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm3         \n\t"\
-    "paddd                     %%mm0, %%mm1         \n\t"\
-    "paddd                     %%mm3, %%mm5         \n\t"\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    "movq                      %%mm2, %%mm0         \n\t"\
-    "punpcklwd                 %%mm3, %%mm2         \n\t"\
-    "punpckhwd                 %%mm3, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm2         \n\t"\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "paddd                     %%mm2, %%mm7         \n\t"\
-    "paddd                     %%mm0, %%mm6         \n\t"\
-    " jnz                         2b                \n\t"\
-    "psrad                       $16, %%mm1         \n\t"\
-    "psrad                       $16, %%mm5         \n\t"\
-    "psrad                       $16, %%mm7         \n\t"\
-    "psrad                       $16, %%mm6         \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-    "packssdw                  %%mm5, %%mm1         \n\t"\
-    "packssdw                  %%mm6, %%mm7         \n\t"\
-    "paddw                     %%mm0, %%mm1         \n\t"\
-    "paddw                     %%mm0, %%mm7         \n\t"\
-    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
-    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE \
-    YSCALEYUV2PACKEDX_ACCURATE_UV \
-    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
-
-#define YSCALEYUV2RGBX \
-    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
-    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
-    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
-    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
-    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
-    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
-    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw           %%mm3, %%mm4       \n\t"\
-    "movq            %%mm2, %%mm0       \n\t"\
-    "movq            %%mm5, %%mm6       \n\t"\
-    "movq            %%mm4, %%mm3       \n\t"\
-    "punpcklwd       %%mm2, %%mm2       \n\t"\
-    "punpcklwd       %%mm5, %%mm5       \n\t"\
-    "punpcklwd       %%mm4, %%mm4       \n\t"\
-    "paddw           %%mm1, %%mm2       \n\t"\
-    "paddw           %%mm1, %%mm5       \n\t"\
-    "paddw           %%mm1, %%mm4       \n\t"\
-    "punpckhwd       %%mm0, %%mm0       \n\t"\
-    "punpckhwd       %%mm6, %%mm6       \n\t"\
-    "punpckhwd       %%mm3, %%mm3       \n\t"\
-    "paddw           %%mm7, %%mm0       \n\t"\
-    "paddw           %%mm7, %%mm6       \n\t"\
-    "paddw           %%mm7, %%mm3       \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb        %%mm0, %%mm2       \n\t"\
-    "packuswb        %%mm6, %%mm5       \n\t"\
-    "packuswb        %%mm3, %%mm4       \n\t"\
-
-#define REAL_YSCALEYUV2PACKED(index, c) \
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
-    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
-    "psraw                $3, %%mm0                           \n\t"\
-    "psraw                $3, %%mm1                           \n\t"\
-    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "xor            "#index", "#index"                        \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
-
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-
-#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
-    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define REAL_YSCALEYUV2RGB_COEFF(c) \
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
-
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
-    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
-    REAL_YSCALEYUV2RGB_COEFF(c)
-
-#define REAL_YSCALEYUV2PACKED1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $7, %%mm3     \n\t" \
-    "psraw                $7, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t" \
-
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
-
-#define REAL_YSCALEYUV2RGB1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
-
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
-    "xor "#index", "#index"             \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $8, %%mm3     \n\t" \
-    "psrlw                $8, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
-
-// do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
-    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
-
-#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
-    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
-    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
-    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
-    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
-    "packuswb          %%mm1, %%mm7     \n\t"
-#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
-
-#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
-    "movq       "#b", "#q2"     \n\t" /* B */\
-    "movq       "#r", "#t"      \n\t" /* R */\
-    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
-    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
-    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
-    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
-    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
-    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
-    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
-\
-    MOVNTQ(   q0,   (dst, index, 4))\
-    MOVNTQ(    b,  8(dst, index, 4))\
-    MOVNTQ(   q2, 16(dst, index, 4))\
-    MOVNTQ(   q3, 24(dst, index, 4))\
-\
-    "add      $8, "#index"      \n\t"\
-    "cmp "#dstw", "#index"      \n\t"\
-    " jb      1b                \n\t"
-#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
-
-#define REAL_WRITERGB16(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $3, %%mm3  \n\t"\
-    "psllq           $3, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
-
-#define REAL_WRITERGB15(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-    "psrlq           $1, %%mm5  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $2, %%mm3  \n\t"\
-    "psllq           $2, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
-
-#define WRITEBGR24OLD(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1             \n\t" /* B */\
-    "movq      %%mm5, %%mm6             \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
-    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
-    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
-    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
-    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
-\
-    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
-    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
-    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
-    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
-    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
-    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
-    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
-    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
-\
-    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
-    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
-    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
-    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
-    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
-    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
-    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
-    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
-\
-    MOVNTQ(%%mm0,   (dst))\
-    MOVNTQ(%%mm2,  8(dst))\
-    MOVNTQ(%%mm3, 16(dst))\
-    "add         $24, "#dst"            \n\t"\
-\
-    "add          $8, "#index"          \n\t"\
-    "cmp     "#dstw", "#index"          \n\t"\
-    " jb          1b                    \n\t"
-
-#define WRITEBGR24MMX(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1     \n\t" /* B */\
-    "movq      %%mm5, %%mm6     \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
-    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
-\
-    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
-    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
-    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
-    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
-\
-    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
-    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
-    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
-    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
-\
-    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
-    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
-    MOVNTQ(%%mm0, (dst))\
-\
-    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
-    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
-    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
-    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
-    MOVNTQ(%%mm5, 16(dst))\
-\
-    "add         $24, "#dst"    \n\t"\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-
-#define WRITEBGR24MMX2(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
-    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
-    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
-    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
-    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
-\
-    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
-    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
-\
-    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
-    "por    %%mm1, %%mm6        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, (dst))\
-\
-    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
-    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
-    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
-    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
-\
-    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
-    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
-    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
-\
-    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
-    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
-    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
-\
-    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
-    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
-\
-    "por    %%mm1, %%mm3        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 16(dst))\
-\
-    "add      $24, "#dst"       \n\t"\
-\
-    "add       $8, "#index"     \n\t"\
-    "cmp  "#dstw", "#index"     \n\t"\
-    " jb       1b               \n\t"
-
-#if COMPILE_TEMPLATE_MMX2
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
-#else
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
-#endif
-
-#define REAL_WRITEYUY2(dst, dstw, index) \
-    "packuswb  %%mm3, %%mm3     \n\t"\
-    "packuswb  %%mm4, %%mm4     \n\t"\
-    "packuswb  %%mm7, %%mm1     \n\t"\
-    "punpcklbw %%mm4, %%mm3     \n\t"\
-    "movq      %%mm1, %%mm7     \n\t"\
-    "punpcklbw %%mm3, %%mm1     \n\t"\
-    "punpckhbw %%mm3, %%mm7     \n\t"\
-\
-    MOVNTQ(%%mm1, (dst, index, 2))\
-    MOVNTQ(%%mm7, 8(dst, index, 2))\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
-
-
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
+                              const int16_t **lumSrc, int lumFilterSize,
+                              const int16_t *chrFilter, const int16_t **chrSrc,
+                              int chrFilterSize, const int16_t **alpSrc,
+                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                              uint8_t *aDest, long dstW, long chrDstW)
 {
-#if COMPILE_TEMPLATE_MMX
-    if(!(c->flags & SWS_BITEXACT)) {
-        if (c->flags & SWS_ACCURATE_RND) {
-            if (uDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        } else {
-            if (uDest) {
-                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        }
-        return;
-    }
-#endif
-#if COMPILE_TEMPLATE_ALTIVEC
-    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                          chrFilter, chrSrc, chrFilterSize,
-                          dest, uDest, vDest, dstW, chrDstW);
-#else //COMPILE_TEMPLATE_ALTIVEC
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
                 chrFilter, chrSrc, chrFilterSize,
                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
-#endif //!COMPILE_TEMPLATE_ALTIVEC
 }
 
-static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
+static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
+                               const int16_t **lumSrc, int lumFilterSize,
+                               const int16_t *chrFilter, const int16_t **chrSrc,
+                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                               int dstW, int chrDstW, enum PixelFormat dstFormat)
 {
     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
                  chrFilter, chrSrc, chrFilterSize,
                  dest, uDest, dstW, chrDstW, dstFormat);
 }
 
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
+                              const int16_t *chrSrc, const int16_t *alpSrc,
+                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                              uint8_t *aDest, long dstW, long chrDstW)
 {
     int i;
-#if COMPILE_TEMPLATE_MMX
-    if(!(c->flags & SWS_BITEXACT)) {
-        long p= 4;
-        const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
-        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
-
-        if (c->flags & SWS_ACCURATE_RND) {
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121_ACCURATE
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-        } else {
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-        }
-        return;
-    }
-#endif
     for (i=0; i<dstW; i++) {
         int val= (lumSrc[i]+64)>>7;
 
@@ -1019,184 +85,12 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, long dstW, long dstY)
 {
-#if COMPILE_TEMPLATE_MMX
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-    if(!(c->flags & SWS_BITEXACT)) {
-        if (c->flags & SWS_ACCURATE_RND) {
-            switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
-                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
-                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
-                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
-                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                }
-                return;
-            case PIX_FMT_BGR24:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-                "add %4, %%"REG_c"                        \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest), "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-                return;
-            case PIX_FMT_RGB555:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
-                YSCALEYUV2PACKEDX_ACCURATE
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            }
-        } else {
-            switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                }
-                return;
-            case PIX_FMT_BGR24:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-                "add                        %4, %%"REG_c"   \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest),  "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-                return;
-            case PIX_FMT_RGB555:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
-                YSCALEYUV2PACKEDX
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            }
-        }
-    }
-#endif /* COMPILE_TEMPLATE_MMX */
-#if COMPILE_TEMPLATE_ALTIVEC
-    /* The following list of supported dstFormat values should
-       match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
-         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
-            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
-                                   dest, dstW, dstY);
-    else
-#endif
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                        chrFilter, chrSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
@@ -1205,157 +99,27 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *uvbuf0,
+                                 const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest, int dstW,
+                                 int yalpha, int uvalpha, int y)
 {
     int  yalpha1=4095- yalpha;
     int uvalpha1=4095-uvalpha;
     int i;
 
-#if COMPILE_TEMPLATE_MMX
-    if(!(c->flags & SWS_BITEXACT)) {
-        switch(c->dstFormat) {
-        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-        case PIX_FMT_RGB32:
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-#if ARCH_X86_64
-                __asm__ volatile(
-                    YSCALEYUV2RGB(%%r8, %5)
-                    YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
-                    "a" (&c->redDither)
-                    ,"r" (abuf0), "r" (abuf1)
-                    : "%r8"
-                );
-#else
-                c->u_temp=(intptr_t)abuf0;
-                c->v_temp=(intptr_t)abuf1;
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "push                   %0              \n\t"
-                    "push                   %1              \n\t"
-                    "mov          "U_TEMP"(%5), %0          \n\t"
-                    "mov          "V_TEMP"(%5), %1          \n\t"
-                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    "pop                    %1              \n\t"
-                    "pop                    %0              \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-#endif
-            } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "pcmpeqd %%mm7, %%mm7                   \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-            }
-            return;
-        case PIX_FMT_BGR24:
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
-            return;
-        case PIX_FMT_RGB555:
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
-            return;
-        case PIX_FMT_RGB565:
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
-            return;
-        case PIX_FMT_YUYV422:
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov %4, %%"REG_b"                        \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
-            return;
-        default: break;
-        }
-    }
-#endif //COMPILE_TEMPLATE_MMX
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
 }
 
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest, int dstW,
+                                 int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
 {
     const int yalpha1=0;
     int i;
@@ -1368,223 +132,6 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
         return;
     }
 
-#if COMPILE_TEMPLATE_MMX
-    if(!(flags & SWS_BITEXACT)) {
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            }
-        } else {
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1b(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            }
-        }
-    }
-#endif /* COMPILE_TEMPLATE_MMX */
     if (uvalpha < 2048) {
         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
     } else {
@@ -1594,89 +141,28 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
+                             uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "movq "MANGLE(bm01010101)", %%mm2           \n\t"
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
-        "pand                %%mm2, %%mm0           \n\t"
-        "pand                %%mm2, %%mm1           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "movq                %%mm0, (%2, %%"REG_a") \n\t"
-        "add                    $8, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i=0; i<width; i++)
         dst[i]= src[2*i];
-#endif
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                              const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
-        "psrlw                  $8, %%mm0           \n\t"
-        "psrlw                  $8, %%mm1           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "movq                %%mm0, %%mm1           \n\t"
-        "psrlw                  $8, %%mm0           \n\t"
-        "pand                %%mm4, %%mm1           \n\t"
-        "packuswb            %%mm0, %%mm0           \n\t"
-        "packuswb            %%mm1, %%mm1           \n\t"
-        "movd                %%mm0, (%3, %%"REG_a") \n\t"
-        "movd                %%mm1, (%2, %%"REG_a") \n\t"
-        "add                    $4, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i=0; i<width; i++) {
         dstU[i]= src1[4*i + 1];
         dstV[i]= src1[4*i + 3];
     }
-#endif
     assert(src1 == src2);
 }
 
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                            const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
-        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
-        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
-        "psrlw                  $8, %%mm0           \n\t"
-        "psrlw                  $8, %%mm1           \n\t"
-        "psrlw                  $8, %%mm2           \n\t"
-        "psrlw                  $8, %%mm3           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "packuswb            %%mm3, %%mm2           \n\t"
-        "movq                %%mm0, (%3, %%"REG_a") \n\t"
-        "movq                %%mm2, (%4, %%"REG_a") \n\t"
-        "add                    $8, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
-        : "%"REG_a
-    );
-#else
     int i;
     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
     // we need to skip each second pixel. Same for BEToUV.
@@ -1684,148 +170,47 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
         dstU[i]= src1[2*i + 1];
         dstV[i]= src2[2*i + 1];
     }
-#endif
 }
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
+                             uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "mov                  %0, %%"REG_a"         \n\t"
-        "1:                                         \n\t"
-        "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
-        "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
-        "psrlw                $8, %%mm0             \n\t"
-        "psrlw                $8, %%mm1             \n\t"
-        "packuswb          %%mm1, %%mm0             \n\t"
-        "movq              %%mm0, (%2, %%"REG_a")   \n\t"
-        "add                  $8, %%"REG_a"         \n\t"
-        " js                  1b                    \n\t"
-        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i=0; i<width; i++)
         dst[i]= src[2*i+1];
-#endif
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                              const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
-        "pand                %%mm4, %%mm0           \n\t"
-        "pand                %%mm4, %%mm1           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "movq                %%mm0, %%mm1           \n\t"
-        "psrlw                  $8, %%mm0           \n\t"
-        "pand                %%mm4, %%mm1           \n\t"
-        "packuswb            %%mm0, %%mm0           \n\t"
-        "packuswb            %%mm1, %%mm1           \n\t"
-        "movd                %%mm0, (%3, %%"REG_a") \n\t"
-        "movd                %%mm1, (%2, %%"REG_a") \n\t"
-        "add                    $4, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i=0; i<width; i++) {
         dstU[i]= src1[4*i + 0];
         dstV[i]= src1[4*i + 2];
     }
-#endif
     assert(src1 == src2);
 }
 
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                            const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
-        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
-        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
-        "pand                %%mm4, %%mm0           \n\t"
-        "pand                %%mm4, %%mm1           \n\t"
-        "pand                %%mm4, %%mm2           \n\t"
-        "pand                %%mm4, %%mm3           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "packuswb            %%mm3, %%mm2           \n\t"
-        "movq                %%mm0, (%3, %%"REG_a") \n\t"
-        "movq                %%mm2, (%4, %%"REG_a") \n\t"
-        "add                    $8, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i=0; i<width; i++) {
         dstU[i]= src1[2*i];
         dstV[i]= src2[2*i];
     }
-#endif
 }
 
-static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, long width)
+static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
+                              const uint8_t *src, long width)
 {
-#if COMPILE_TEMPLATE_MMX
-    __asm__ volatile(
-        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-        "mov                    %0, %%"REG_a"       \n\t"
-        "1:                                         \n\t"
-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
-        "movq                %%mm0, %%mm2           \n\t"
-        "movq                %%mm1, %%mm3           \n\t"
-        "pand                %%mm4, %%mm0           \n\t"
-        "pand                %%mm4, %%mm1           \n\t"
-        "psrlw                  $8, %%mm2           \n\t"
-        "psrlw                  $8, %%mm3           \n\t"
-        "packuswb            %%mm1, %%mm0           \n\t"
-        "packuswb            %%mm3, %%mm2           \n\t"
-        "movq                %%mm0, (%2, %%"REG_a") \n\t"
-        "movq                %%mm2, (%3, %%"REG_a") \n\t"
-        "add                    $8, %%"REG_a"       \n\t"
-        " js                    1b                  \n\t"
-        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
-        : "%"REG_a
-    );
-#else
     int i;
     for (i = 0; i < width; i++) {
         dst1[i] = src[2*i+0];
         dst2[i] = src[2*i+1];
     }
-#endif
-}
-
-static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
-{
-    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
-}
-
-static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
-{
-    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 
 // FIXME Maybe dither instead.
@@ -1855,124 +240,23 @@ YUV_NBPS(10, LE, AV_RL16)
 YUV_NBPS(10, BE, AV_RB16)
 #endif // YUV_NBPS
 
-#if COMPILE_TEMPLATE_MMX
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              long width, uint32_t *unused)
 {
-
-    if(srcFormat == PIX_FMT_BGR24) {
-        __asm__ volatile(
-            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
-            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
-            :
-        );
-    } else {
-        __asm__ volatile(
-            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
-            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
-            :
-        );
-    }
-
-    __asm__ volatile(
-        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
-        "mov                        %2, %%"REG_a"   \n\t"
-        "pxor                    %%mm7, %%mm7       \n\t"
-        "1:                                         \n\t"
-        PREFETCH"               64(%0)              \n\t"
-        "movd                     (%0), %%mm0       \n\t"
-        "movd                    2(%0), %%mm1       \n\t"
-        "movd                    6(%0), %%mm2       \n\t"
-        "movd                    8(%0), %%mm3       \n\t"
-        "add                       $12, %0          \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "pmaddwd                 %%mm5, %%mm0       \n\t"
-        "pmaddwd                 %%mm6, %%mm1       \n\t"
-        "pmaddwd                 %%mm5, %%mm2       \n\t"
-        "pmaddwd                 %%mm6, %%mm3       \n\t"
-        "paddd                   %%mm1, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-        "paddd                   %%mm4, %%mm0       \n\t"
-        "paddd                   %%mm4, %%mm2       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "packssdw                %%mm2, %%mm0       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
-        " js                        1b              \n\t"
-    : "+r" (src)
-    : "r" (dst+width), "g" ((x86_reg)-width)
-    : "%"REG_a
-    );
+    nvXXtoUV_c(dstU, dstV, src1, width);
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              long width, uint32_t *unused)
 {
-    __asm__ volatile(
-        "movq                   24(%4), %%mm6       \n\t"
-        "mov                        %3, %%"REG_a"   \n\t"
-        "pxor                    %%mm7, %%mm7       \n\t"
-        "1:                                         \n\t"
-        PREFETCH"               64(%0)              \n\t"
-        "movd                     (%0), %%mm0       \n\t"
-        "movd                    2(%0), %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "movq                    %%mm0, %%mm2       \n\t"
-        "movq                    %%mm1, %%mm3       \n\t"
-        "pmaddwd                  (%4), %%mm0       \n\t"
-        "pmaddwd                 8(%4), %%mm1       \n\t"
-        "pmaddwd                16(%4), %%mm2       \n\t"
-        "pmaddwd                 %%mm6, %%mm3       \n\t"
-        "paddd                   %%mm1, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-
-        "movd                    6(%0), %%mm1       \n\t"
-        "movd                    8(%0), %%mm3       \n\t"
-        "add                       $12, %0          \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "movq                    %%mm1, %%mm4       \n\t"
-        "movq                    %%mm3, %%mm5       \n\t"
-        "pmaddwd                  (%4), %%mm1       \n\t"
-        "pmaddwd                 8(%4), %%mm3       \n\t"
-        "pmaddwd                16(%4), %%mm4       \n\t"
-        "pmaddwd                 %%mm6, %%mm5       \n\t"
-        "paddd                   %%mm3, %%mm1       \n\t"
-        "paddd                   %%mm5, %%mm4       \n\t"
-
-        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
-        "paddd                   %%mm3, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-        "paddd                   %%mm3, %%mm1       \n\t"
-        "paddd                   %%mm3, %%mm4       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "psrad                     $15, %%mm1       \n\t"
-        "psrad                     $15, %%mm4       \n\t"
-        "packssdw                %%mm1, %%mm0       \n\t"
-        "packssdw                %%mm4, %%mm2       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "packuswb                %%mm2, %%mm2       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "movd                %%mm2, (%2, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
-        " js                        1b              \n\t"
-    : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
-    : "%"REG_a
-    );
+    nvXXtoUV_c(dstV, dstU, src1, width);
 }
-#endif
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+                              long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
-#else
     int i;
     for (i=0; i<width; i++) {
         int b= src[i*3+0];
@@ -1981,14 +265,11 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width
 
         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
     }
-#endif /* COMPILE_TEMPLATE_MMX */
 }
 
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                               const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
-#else
     int i;
     for (i=0; i<width; i++) {
         int b= src1[3*i + 0];
@@ -1998,11 +279,11 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
     }
-#endif /* COMPILE_TEMPLATE_MMX */
     assert(src1 == src2);
 }
 
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                                    const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -2016,11 +297,9 @@ static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const ui
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
+                              uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
-#else
     int i;
     for (i=0; i<width; i++) {
         int r= src[i*3+0];
@@ -2029,15 +308,11 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width
 
         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
     }
-#endif
 }
 
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                               const uint8_t *src2, long width, uint32_t *unused)
 {
-#if COMPILE_TEMPLATE_MMX
-    assert(src1==src2);
-    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
-#else
     int i;
     assert(src1==src2);
     for (i=0; i<width; i++) {
@@ -2048,10 +323,10 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
     }
-#endif
 }
 
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                                    const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -2067,163 +342,11 @@ static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const ui
 
 
 // bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
+                            int srcW, int xInc,
+                            const int16_t *filter, const int16_t *filterPos,
+                            long filterSize)
 {
-#if COMPILE_TEMPLATE_MMX
-    assert(filterSize % 4 == 0 && filterSize>0);
-    if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
-        x86_reg counter= -2*dstW;
-        filter-= counter*2;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-#if defined(PIC)
-            "push            %%"REG_b"              \n\t"
-#endif
-            "pxor                %%mm7, %%mm7       \n\t"
-            "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
-            "mov             %%"REG_a", %%"REG_BP"  \n\t"
-            ".p2align                4              \n\t"
-            "1:                                     \n\t"
-            "movzwl   (%2, %%"REG_BP"), %%eax       \n\t"
-            "movzwl  2(%2, %%"REG_BP"), %%ebx       \n\t"
-            "movq  (%1, %%"REG_BP", 4), %%mm1       \n\t"
-            "movq 8(%1, %%"REG_BP", 4), %%mm3       \n\t"
-            "movd      (%3, %%"REG_a"), %%mm0       \n\t"
-            "movd      (%3, %%"REG_b"), %%mm2       \n\t"
-            "punpcklbw           %%mm7, %%mm0       \n\t"
-            "punpcklbw           %%mm7, %%mm2       \n\t"
-            "pmaddwd             %%mm1, %%mm0       \n\t"
-            "pmaddwd             %%mm2, %%mm3       \n\t"
-            "movq                %%mm0, %%mm4       \n\t"
-            "punpckldq           %%mm3, %%mm0       \n\t"
-            "punpckhdq           %%mm3, %%mm4       \n\t"
-            "paddd               %%mm4, %%mm0       \n\t"
-            "psrad                  $7, %%mm0       \n\t"
-            "packssdw            %%mm0, %%mm0       \n\t"
-            "movd                %%mm0, (%4, %%"REG_BP")    \n\t"
-            "add                    $4, %%"REG_BP"  \n\t"
-            " jnc                   1b              \n\t"
-
-            "pop            %%"REG_BP"              \n\t"
-#if defined(PIC)
-            "pop             %%"REG_b"              \n\t"
-#endif
-            : "+a" (counter)
-            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
-#if !defined(PIC)
-            : "%"REG_b
-#endif
-        );
-    } else if (filterSize==8) {
-        x86_reg counter= -2*dstW;
-        filter-= counter*4;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-#if defined(PIC)
-            "push             %%"REG_b"             \n\t"
-#endif
-            "pxor                 %%mm7, %%mm7      \n\t"
-            "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
-            "mov              %%"REG_a", %%"REG_BP" \n\t"
-            ".p2align                 4             \n\t"
-            "1:                                     \n\t"
-            "movzwl    (%2, %%"REG_BP"), %%eax      \n\t"
-            "movzwl   2(%2, %%"REG_BP"), %%ebx      \n\t"
-            "movq   (%1, %%"REG_BP", 8), %%mm1      \n\t"
-            "movq 16(%1, %%"REG_BP", 8), %%mm3      \n\t"
-            "movd       (%3, %%"REG_a"), %%mm0      \n\t"
-            "movd       (%3, %%"REG_b"), %%mm2      \n\t"
-            "punpcklbw            %%mm7, %%mm0      \n\t"
-            "punpcklbw            %%mm7, %%mm2      \n\t"
-            "pmaddwd              %%mm1, %%mm0      \n\t"
-            "pmaddwd              %%mm2, %%mm3      \n\t"
-
-            "movq  8(%1, %%"REG_BP", 8), %%mm1      \n\t"
-            "movq 24(%1, %%"REG_BP", 8), %%mm5      \n\t"
-            "movd      4(%3, %%"REG_a"), %%mm4      \n\t"
-            "movd      4(%3, %%"REG_b"), %%mm2      \n\t"
-            "punpcklbw            %%mm7, %%mm4      \n\t"
-            "punpcklbw            %%mm7, %%mm2      \n\t"
-            "pmaddwd              %%mm1, %%mm4      \n\t"
-            "pmaddwd              %%mm2, %%mm5      \n\t"
-            "paddd                %%mm4, %%mm0      \n\t"
-            "paddd                %%mm5, %%mm3      \n\t"
-            "movq                 %%mm0, %%mm4      \n\t"
-            "punpckldq            %%mm3, %%mm0      \n\t"
-            "punpckhdq            %%mm3, %%mm4      \n\t"
-            "paddd                %%mm4, %%mm0      \n\t"
-            "psrad                   $7, %%mm0      \n\t"
-            "packssdw             %%mm0, %%mm0      \n\t"
-            "movd                 %%mm0, (%4, %%"REG_BP")   \n\t"
-            "add                     $4, %%"REG_BP" \n\t"
-            " jnc                    1b             \n\t"
-
-            "pop             %%"REG_BP"             \n\t"
-#if defined(PIC)
-            "pop              %%"REG_b"             \n\t"
-#endif
-            : "+a" (counter)
-            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
-#if !defined(PIC)
-            : "%"REG_b
-#endif
-        );
-    } else {
-        const uint8_t *offset = src+filterSize;
-        x86_reg counter= -2*dstW;
-        //filter-= counter*filterSize/2;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-            "pxor                  %%mm7, %%mm7     \n\t"
-            ".p2align                  4            \n\t"
-            "1:                                     \n\t"
-            "mov                      %2, %%"REG_c" \n\t"
-            "movzwl      (%%"REG_c", %0), %%eax     \n\t"
-            "movzwl     2(%%"REG_c", %0), %%edx     \n\t"
-            "mov                      %5, %%"REG_c" \n\t"
-            "pxor                  %%mm4, %%mm4     \n\t"
-            "pxor                  %%mm5, %%mm5     \n\t"
-            "2:                                     \n\t"
-            "movq                   (%1), %%mm1     \n\t"
-            "movq               (%1, %6), %%mm3     \n\t"
-            "movd (%%"REG_c", %%"REG_a"), %%mm0     \n\t"
-            "movd (%%"REG_c", %%"REG_d"), %%mm2     \n\t"
-            "punpcklbw             %%mm7, %%mm0     \n\t"
-            "punpcklbw             %%mm7, %%mm2     \n\t"
-            "pmaddwd               %%mm1, %%mm0     \n\t"
-            "pmaddwd               %%mm2, %%mm3     \n\t"
-            "paddd                 %%mm3, %%mm5     \n\t"
-            "paddd                 %%mm0, %%mm4     \n\t"
-            "add                      $8, %1        \n\t"
-            "add                      $4, %%"REG_c" \n\t"
-            "cmp                      %4, %%"REG_c" \n\t"
-            " jb                      2b            \n\t"
-            "add                      %6, %1        \n\t"
-            "movq                  %%mm4, %%mm0     \n\t"
-            "punpckldq             %%mm5, %%mm4     \n\t"
-            "punpckhdq             %%mm5, %%mm0     \n\t"
-            "paddd                 %%mm0, %%mm4     \n\t"
-            "psrad                    $7, %%mm4     \n\t"
-            "packssdw              %%mm4, %%mm4     \n\t"
-            "mov                      %3, %%"REG_a" \n\t"
-            "movd                  %%mm4, (%%"REG_a", %0)   \n\t"
-            "add                      $4, %0        \n\t"
-            " jnc                     1b            \n\t"
-
-            : "+r" (counter), "+r" (filter)
-            : "m" (filterPos), "m" (dst), "m"(offset),
-            "m" (src), "r" ((x86_reg)filterSize*2)
-            : "%"REG_a, "%"REG_c, "%"REG_d
-        );
-    }
-#else
-#if COMPILE_TEMPLATE_ALTIVEC
-    hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
-#else
     int i;
     for (i=0; i<dstW; i++) {
         int j;
@@ -2238,8 +361,6 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
         //dst[i] = val>>7;
     }
-#endif /* COMPILE_TEMPLATE_ALTIVEC */
-#endif /* COMPILE_MMX */
 }
 
 static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
@@ -2415,7 +536,7 @@ static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src
 
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
-static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
+static void chrRangeToJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -2423,7 +544,7 @@ static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
         dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
     }
 }
-static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width)
+static void chrRangeFromJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -2431,139 +552,22 @@ static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width)
         dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
     }
 }
-static void RENAME(lumRangeToJpeg)(int16_t *dst, int width)
+static void lumRangeToJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++)
         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
 }
-static void RENAME(lumRangeFromJpeg)(int16_t *dst, int width)
+static void lumRangeFromJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++)
         dst[i] = (dst[i]*14071 + 33561947)>>14;
 }
 
-#define FAST_BILINEAR_X86 \
-    "subl    %%edi, %%esi    \n\t" /*  src[xx+1] - src[xx] */                   \
-    "imull   %%ecx, %%esi    \n\t" /* (src[xx+1] - src[xx])*xalpha */           \
-    "shll      $16, %%edi    \n\t"                                              \
-    "addl    %%edi, %%esi    \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */  \
-    "mov        %1, %%"REG_D"\n\t"                                              \
-    "shrl       $9, %%esi    \n\t"                                              \
-
-static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src, int srcW,
-                                        int xInc)
+static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
+                                  const uint8_t *src, int srcW, int xInc)
 {
-#if ARCH_X86
-#if COMPILE_TEMPLATE_MMX2
-    int32_t *filterPos = c->hLumFilterPos;
-    int16_t *filter    = c->hLumFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
-    void    *mmx2FilterCode= c->lumMmx2FilterCode;
-    int i;
-#if defined(PIC)
-    DECLARE_ALIGNED(8, uint64_t, ebxsave);
-#endif
-    if (canMMX2BeUsed) {
-        __asm__ volatile(
-#if defined(PIC)
-            "mov               %%"REG_b", %5        \n\t"
-#endif
-            "pxor                  %%mm7, %%mm7     \n\t"
-            "mov                      %0, %%"REG_c" \n\t"
-            "mov                      %1, %%"REG_D" \n\t"
-            "mov                      %2, %%"REG_d" \n\t"
-            "mov                      %3, %%"REG_b" \n\t"
-            "xor               %%"REG_a", %%"REG_a" \n\t" // i
-            PREFETCH"        (%%"REG_c")            \n\t"
-            PREFETCH"      32(%%"REG_c")            \n\t"
-            PREFETCH"      64(%%"REG_c")            \n\t"
-
-#if ARCH_X86_64
-
-#define CALL_MMX2_FILTER_CODE \
-            "movl            (%%"REG_b"), %%esi     \n\t"\
-            "call                    *%4            \n\t"\
-            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-            "add               %%"REG_S", %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#else
-
-#define CALL_MMX2_FILTER_CODE \
-            "movl (%%"REG_b"), %%esi        \n\t"\
-            "call         *%4                       \n\t"\
-            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#endif /* ARCH_X86_64 */
-
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-
-#if defined(PIC)
-            "mov                      %5, %%"REG_b" \n\t"
-#endif
-            :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode)
-#if defined(PIC)
-            ,"m" (ebxsave)
-#endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-            ,"%"REG_b
-#endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
-    } else {
-#endif /* COMPILE_TEMPLATE_MMX2 */
-    x86_reg xInc_shr16 = xInc >> 16;
-    uint16_t xInc_mask = xInc & 0xffff;
-    x86_reg dstWidth_reg = dstWidth;
-    //NO MMX just normal asm ...
-    __asm__ volatile(
-        "xor %%"REG_a", %%"REG_a"            \n\t" // i
-        "xor %%"REG_d", %%"REG_d"            \n\t" // xx
-        "xorl    %%ecx, %%ecx                \n\t" // xalpha
-        ".p2align    4                       \n\t"
-        "1:                                  \n\t"
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        FAST_BILINEAR_X86
-        "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
-
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        FAST_BILINEAR_X86
-        "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
-        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
-
-
-        "add        $2, %%"REG_a"            \n\t"
-        "cmp        %2, %%"REG_a"            \n\t"
-        " jb        1b                       \n\t"
-
-
-        :: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask)
-        : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-    );
-#if COMPILE_TEMPLATE_MMX2
-    } //if MMX2 can't be used
-#endif
-#else
     int i;
     unsigned int xpos=0;
     for (i=0;i<dstWidth;i++) {
@@ -2572,15 +576,15 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
         xpos+=xInc;
     }
-#endif /* ARCH_X86 */
 }
 
       // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
-                                   const int16_t *hLumFilter,
-                                   const int16_t *hLumFilterPos, int hLumFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal, int isAlpha)
+static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+                             const uint8_t *src, int srcW, int xInc,
+                             const int16_t *hLumFilter,
+                             const int16_t *hLumFilterPos, int hLumFilterSize,
+                             uint8_t *formatConvBuffer,
+                             uint32_t *pal, int isAlpha)
 {
     void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
@@ -2604,112 +608,10 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
         convertRange(dst, dstWidth);
 }
 
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src1,
-                                        const uint8_t *src2, int srcW, int xInc)
+static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
+                                  long dstWidth, const uint8_t *src1,
+                                  const uint8_t *src2, int srcW, int xInc)
 {
-#if ARCH_X86
-#if COMPILE_TEMPLATE_MMX2
-    int32_t *filterPos = c->hChrFilterPos;
-    int16_t *filter    = c->hChrFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
-    void    *mmx2FilterCode= c->chrMmx2FilterCode;
-    int i;
-#if defined(PIC)
-    DECLARE_ALIGNED(8, uint64_t, ebxsave);
-#endif
-    if (canMMX2BeUsed) {
-        __asm__ volatile(
-#if defined(PIC)
-            "mov          %%"REG_b", %6         \n\t"
-#endif
-            "pxor             %%mm7, %%mm7      \n\t"
-            "mov                 %0, %%"REG_c"  \n\t"
-            "mov                 %1, %%"REG_D"  \n\t"
-            "mov                 %2, %%"REG_d"  \n\t"
-            "mov                 %3, %%"REG_b"  \n\t"
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
-
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            "mov                 %5, %%"REG_c"  \n\t" // src
-            "mov                 %1, %%"REG_D"  \n\t" // buf1
-            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
-
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-
-#if defined(PIC)
-            "mov %6, %%"REG_b"    \n\t"
-#endif
-            :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode), "m" (src2)
-#if defined(PIC)
-            ,"m" (ebxsave)
-#endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-            ,"%"REG_b
-#endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-            //printf("%d %d %d\n", dstWidth, i, srcW);
-            dst[i] = src1[srcW-1]*128;
-            dst[i+VOFW] = src2[srcW-1]*128;
-        }
-    } else {
-#endif /* COMPILE_TEMPLATE_MMX2 */
-        x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
-        uint16_t xInc_mask = xInc & 0xffff;
-        x86_reg dstWidth_reg = dstWidth;
-        __asm__ volatile(
-            "xor %%"REG_a", %%"REG_a"               \n\t" // i
-            "xor %%"REG_d", %%"REG_d"               \n\t" // xx
-            "xorl    %%ecx, %%ecx                   \n\t" // xalpha
-            ".p2align    4                          \n\t"
-            "1:                                     \n\t"
-            "mov        %0, %%"REG_S"               \n\t"
-            "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
-            "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
-            FAST_BILINEAR_X86
-            "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
-            "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
-            FAST_BILINEAR_X86
-            "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "addw       %4, %%cx                    \n\t" //xalpha += xInc&0xFFFF
-            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>16 + carry
-            "add        $1, %%"REG_a"               \n\t"
-            "cmp        %2, %%"REG_a"               \n\t"
-            " jb        1b                          \n\t"
-
-/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
-which is needed to support GCC 4.0. */
-#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
-            :: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
-#else
-            :: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
-#endif
-            "r" (src2)
-            : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-        );
-#if COMPILE_TEMPLATE_MMX2
-    } //if MMX2 can't be used
-#endif
-#else
     int i;
     unsigned int xpos=0;
     for (i=0;i<dstWidth;i++) {
@@ -2723,14 +625,13 @@ which is needed to support GCC 4.0. */
         */
         xpos+=xInc;
     }
-#endif /* ARCH_X86 */
 }
 
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
-                                   int srcW, int xInc, const int16_t *hChrFilter,
-                                   const int16_t *hChrFilterPos, int hChrFilterSize,
-                                   uint8_t *formatConvBuffer,
-                                   uint32_t *pal)
+inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int srcW, int xInc, const int16_t *hChrFilter,
+                             const int16_t *hChrFilterPos, int hChrFilterSize,
+                             uint8_t *formatConvBuffer, uint32_t *pal)
 {
 
     src1 += c->chrSrcOffset;
@@ -2759,8 +660,8 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
-static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[])
+static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
+                     int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 {
     /* load a few things into local vars to make the code more readable? and faster */
     const int srcW= c->srcW;
@@ -2893,15 +794,15 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
             assert(lumBufIndex < 2*vLumBufSize);
             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
             assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
-                            hLumFilter, hLumFilterPos, hLumFilterSize,
-                            formatConvBuffer,
-                            pal, 0);
+            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+                      hLumFilter, hLumFilterPos, hLumFilterSize,
+                      formatConvBuffer,
+                      pal, 0);
             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
-                                hLumFilter, hLumFilterPos, hLumFilterSize,
-                                formatConvBuffer,
-                                pal, 1);
+                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
+                          lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
+                          formatConvBuffer,
+                          pal, 1);
             lastInLumBuf++;
             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
                                lumBufIndex,    lastInLumBuf);
@@ -2916,7 +817,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
             //FIXME replace parameters through context struct (some at least)
 
             if (c->needs_hcscale)
-                RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
                                 hChrFilter, hChrFilterPos, hChrFilterSize,
                                 formatConvBuffer,
                                 pal);
@@ -2930,65 +831,10 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
         if (!enough_lines)
             break; //we can't output a dstY line so let's try with the next slice
 
-#if COMPILE_TEMPLATE_MMX
-        c->blueDither= ff_dither8[dstY&1];
-        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-            c->greenDither= ff_dither8[dstY&1];
-        else
-            c->greenDither= ff_dither4[dstY&1];
-        c->redDither= ff_dither8[(dstY+1)&1];
-#endif
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-#if COMPILE_TEMPLATE_MMX
-            int i;
-            if (flags & SWS_ACCURATE_RND) {
-                int s= APCK_SIZE / 8;
-                for (i=0; i<vLumFilterSize; i+=2) {
-                    *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                    *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                              lumMmxFilter[s*i+APCK_COEF/4  ]=
-                              lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                        + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
-                        *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
-                                  alpMmxFilter[s*i+APCK_COEF/4  ]=
-                                  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i+=2) {
-                    *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                    *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
-                              chrMmxFilter[s*i+APCK_COEF/4  ]=
-                              chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                        + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-                }
-            } else {
-                for (i=0; i<vLumFilterSize; i++) {
-                    lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                    lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
-                    lumMmxFilter[4*i+2]=
-                    lumMmxFilter[4*i+3]=
-                        ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
-                        alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
-                        alpMmxFilter[4*i+2]=
-                        alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i++) {
-                    chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                    chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
-                    chrMmxFilter[4*i+2]=
-                    chrMmxFilter[4*i+3]=
-                        ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
-                }
-            }
-#endif
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
@@ -3109,12 +955,6 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
-#if COMPILE_TEMPLATE_MMX
-    if (flags & SWS_CPU_CAPS_MMX2 )  __asm__ volatile("sfence":::"memory");
-    /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-    if (flags & SWS_CPU_CAPS_3DNOW)  __asm__ volatile("femms" :::"memory");
-    else                             __asm__ volatile("emms"  :::"memory");
-#endif
     /* store changed local vars back in the context */
     c->dstY= dstY;
     c->lumBufIndex= lumBufIndex;
@@ -3125,36 +965,31 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
     return dstY - lastDstY;
 }
 
-static void RENAME(sws_init_swScale)(SwsContext *c)
+static void sws_init_swScale_c(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 
-    c->yuv2nv12X    = RENAME(yuv2nv12X   );
-    c->yuv2yuv1     = RENAME(yuv2yuv1    );
-    c->yuv2yuvX     = RENAME(yuv2yuvX    );
-    c->yuv2packed1  = RENAME(yuv2packed1 );
-    c->yuv2packed2  = RENAME(yuv2packed2 );
-    c->yuv2packedX  = RENAME(yuv2packedX );
+    c->yuv2nv12X    = yuv2nv12X_c;
+    c->yuv2yuv1     = yuv2yuv1_c;
+    c->yuv2yuvX     = yuv2yuvX_c;
+    c->yuv2packed1  = yuv2packed1_c;
+    c->yuv2packed2  = yuv2packed2_c;
+    c->yuv2packedX  = yuv2packedX_c;
 
-    c->hScale       = RENAME(hScale      );
+    c->hScale       = hScale_c;
 
-#if COMPILE_TEMPLATE_MMX
-    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
-    if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
-#else
     if (c->flags & SWS_FAST_BILINEAR)
-#endif
     {
-        c->hyscale_fast = RENAME(hyscale_fast);
-        c->hcscale_fast = RENAME(hcscale_fast);
+        c->hyscale_fast = hyscale_fast_c;
+        c->hcscale_fast = hcscale_fast_c;
     }
 
     c->chrToYV12 = NULL;
     switch(srcFormat) {
-        case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
-        case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
-        case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
-        case PIX_FMT_NV21     : c->chrToYV12 = RENAME(nv21ToUV); break;
+        case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
+        case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
+        case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
+        case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
         case PIX_FMT_RGB8     :
         case PIX_FMT_BGR8     :
         case PIX_FMT_PAL8     :
@@ -3183,12 +1018,12 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
         }
@@ -3200,12 +1035,12 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
         }
@@ -3216,13 +1051,13 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     switch (srcFormat) {
     case PIX_FMT_YUYV422  :
     case PIX_FMT_GRAY8A   :
-                            c->lumToYV12 = RENAME(yuy2ToY); break;
+                            c->lumToYV12 = yuy2ToY_c; break;
     case PIX_FMT_UYVY422  :
-                            c->lumToYV12 = RENAME(uyvyToY); break;
-    case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
+                            c->lumToYV12 = uyvyToY_c; break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
-    case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
     case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
     case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
     case PIX_FMT_RGB8     :
@@ -3247,7 +1082,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_RGB32_1:
         case PIX_FMT_BGR32  :
         case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
-        case PIX_FMT_GRAY8A : c->alpToYV12 = RENAME(yuy2ToY); break;
+        case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break;
         case PIX_FMT_PAL8   : c->alpToYV12 = palToA; break;
         }
     }
@@ -3270,11 +1105,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
 
     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
         if (c->srcRange) {
-            c->lumConvertRange = RENAME(lumRangeFromJpeg);
-            c->chrConvertRange = RENAME(chrRangeFromJpeg);
+            c->lumConvertRange = lumRangeFromJpeg_c;
+            c->chrConvertRange = chrRangeFromJpeg_c;
         } else {
-            c->lumConvertRange = RENAME(lumRangeToJpeg);
-            c->chrConvertRange = RENAME(chrRangeToJpeg);
+            c->lumConvertRange = lumRangeToJpeg_c;
+            c->chrConvertRange = chrRangeToJpeg_c;
         }
     }
 
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
new file mode 100644
index 0000000000..0f468a7f05
--- /dev/null
+++ b/libswscale/x86/rgb2rgb.c
@@ -0,0 +1,137 @@
+/*
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ *              software YUV to YUV converter
+ *              software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/x86_cpu.h"
+#include "libavutil/bswap.h"
+#include "libswscale/rgb2rgb.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+DECLARE_ASM_CONST(8, uint64_t, mmx_ff)       = 0x00000000000000FFULL;
+DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32g)      = 0x0000FF000000FF00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32r)      = 0x00FF000000FF0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32)       = 0x00FFFFFF00FFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
+DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
+DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
+#define mask16b mask15b
+DECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
+DECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
+DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
+DECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
+DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
+DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
+
+#define RGB2YUV_SHIFT 8
+#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
+
+//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
+
+#define COMPILE_TEMPLATE_MMX2 0
+#define COMPILE_TEMPLATE_AMD3DNOW 0
+#define COMPILE_TEMPLATE_SSE2 0
+
+//MMX versions
+#undef RENAME
+#define RENAME(a) a ## _MMX
+#include "rgb2rgb_template.c"
+
+//MMX2 versions
+#undef RENAME
+#undef COMPILE_TEMPLATE_MMX2
+#define COMPILE_TEMPLATE_MMX2 1
+#define RENAME(a) a ## _MMX2
+#include "rgb2rgb_template.c"
+
+//SSE2 versions
+#undef RENAME
+#undef COMPILE_TEMPLATE_SSE2
+#define COMPILE_TEMPLATE_SSE2 1
+#define RENAME(a) a ## _SSE2
+#include "rgb2rgb_template.c"
+
+//3DNOW versions
+#undef RENAME
+#undef COMPILE_TEMPLATE_MMX2
+#undef COMPILE_TEMPLATE_SSE2
+#undef COMPILE_TEMPLATE_AMD3DNOW
+#define COMPILE_TEMPLATE_MMX2 0
+#define COMPILE_TEMPLATE_SSE2 1
+#define COMPILE_TEMPLATE_AMD3DNOW 1
+#define RENAME(a) a ## _3DNOW
+#include "rgb2rgb_template.c"
+
+/*
+ RGB15->RGB16 original by Strepto/Astral
+ ported to gcc & bugfixed : A'rpi
+ MMX2, 3DNOW optimization by Nick Kurshev
+ 32-bit C version, and and&add trick by Michael Niedermayer
+*/
+
+void rgb2rgb_init_x86(int flags)
+{
+#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
+    if (flags & SWS_CPU_CAPS_SSE2)
+        rgb2rgb_init_SSE2();
+    else if (flags & SWS_CPU_CAPS_MMX2)
+        rgb2rgb_init_MMX2();
+    else if (flags & SWS_CPU_CAPS_3DNOW)
+        rgb2rgb_init_3DNOW();
+    else if (flags & SWS_CPU_CAPS_MMX)
+        rgb2rgb_init_MMX();
+#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
+}
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
new file mode 100644
index 0000000000..c675f24aa0
--- /dev/null
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -0,0 +1,2646 @@
+/*
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ *              software YUV to YUV converter
+ *              software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ * lot of big-endian byte order fixes by Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+
+#undef PREFETCH
+#undef MOVNTQ
+#undef EMMS
+#undef SFENCE
+#undef MMREG_SIZE
+#undef PAVGB
+
+#if COMPILE_TEMPLATE_SSE2
+#define MMREG_SIZE 16
+#else
+#define MMREG_SIZE 8
+#endif
+
+#if COMPILE_TEMPLATE_AMD3DNOW
+#define PREFETCH  "prefetch"
+#define PAVGB     "pavgusb"
+#elif COMPILE_TEMPLATE_MMX2
+#define PREFETCH "prefetchnta"
+#define PAVGB     "pavgb"
+#else
+#define PREFETCH  " # nop"
+#endif
+
+#if COMPILE_TEMPLATE_AMD3DNOW
+/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+#define EMMS     "femms"
+#else
+#define EMMS     "emms"
+#endif
+
+#if COMPILE_TEMPLATE_MMX2
+#define MOVNTQ "movntq"
+#define SFENCE "sfence"
+#else
+#define MOVNTQ "movq"
+#define SFENCE " # nop"
+#endif
+
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    uint8_t *dest = dst;
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 23;
+    __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "punpckldq    3%1, %%mm0    \n\t"
+            "movd         6%1, %%mm1    \n\t"
+            "punpckldq    9%1, %%mm1    \n\t"
+            "movd        12%1, %%mm2    \n\t"
+            "punpckldq   15%1, %%mm2    \n\t"
+            "movd        18%1, %%mm3    \n\t"
+            "punpckldq   21%1, %%mm3    \n\t"
+            "por        %%mm7, %%mm0    \n\t"
+            "por        %%mm7, %%mm1    \n\t"
+            "por        %%mm7, %%mm2    \n\t"
+            "por        %%mm7, %%mm3    \n\t"
+            MOVNTQ"     %%mm0,   %0     \n\t"
+            MOVNTQ"     %%mm1,  8%0     \n\t"
+            MOVNTQ"     %%mm2, 16%0     \n\t"
+            MOVNTQ"     %%mm3, 24%0"
+            :"=m"(*dest)
+            :"m"(*s)
+            :"memory");
+        dest += 32;
+        s += 24;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = 255;
+    }
+}
+
+#define STORE_BGR24_MMX \
+            "psrlq         $8, %%mm2    \n\t" \
+            "psrlq         $8, %%mm3    \n\t" \
+            "psrlq         $8, %%mm6    \n\t" \
+            "psrlq         $8, %%mm7    \n\t" \
+            "pand "MANGLE(mask24l)", %%mm0\n\t" \
+            "pand "MANGLE(mask24l)", %%mm1\n\t" \
+            "pand "MANGLE(mask24l)", %%mm4\n\t" \
+            "pand "MANGLE(mask24l)", %%mm5\n\t" \
+            "pand "MANGLE(mask24h)", %%mm2\n\t" \
+            "pand "MANGLE(mask24h)", %%mm3\n\t" \
+            "pand "MANGLE(mask24h)", %%mm6\n\t" \
+            "pand "MANGLE(mask24h)", %%mm7\n\t" \
+            "por        %%mm2, %%mm0    \n\t" \
+            "por        %%mm3, %%mm1    \n\t" \
+            "por        %%mm6, %%mm4    \n\t" \
+            "por        %%mm7, %%mm5    \n\t" \
+ \
+            "movq       %%mm1, %%mm2    \n\t" \
+            "movq       %%mm4, %%mm3    \n\t" \
+            "psllq        $48, %%mm2    \n\t" \
+            "psllq        $32, %%mm3    \n\t" \
+            "pand "MANGLE(mask24hh)", %%mm2\n\t" \
+            "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
+            "por        %%mm2, %%mm0    \n\t" \
+            "psrlq        $16, %%mm1    \n\t" \
+            "psrlq        $32, %%mm4    \n\t" \
+            "psllq        $16, %%mm5    \n\t" \
+            "por        %%mm3, %%mm1    \n\t" \
+            "pand  "MANGLE(mask24hhhh)", %%mm5\n\t" \
+            "por        %%mm5, %%mm4    \n\t" \
+ \
+            MOVNTQ"     %%mm0,   %0     \n\t" \
+            MOVNTQ"     %%mm1,  8%0     \n\t" \
+            MOVNTQ"     %%mm4, 16%0"
+
+
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    uint8_t *dest = dst;
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 31;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq         8%1, %%mm1    \n\t"
+            "movq        16%1, %%mm4    \n\t"
+            "movq        24%1, %%mm5    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm1, %%mm3    \n\t"
+            "movq       %%mm4, %%mm6    \n\t"
+            "movq       %%mm5, %%mm7    \n\t"
+            STORE_BGR24_MMX
+            :"=m"(*dest)
+            :"m"(*s)
+            :"memory");
+        dest += 24;
+        s += 32;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = *s++;
+        s++;
+    }
+}
+
+/*
+ original by Strepto/Astral
+ ported to gcc & bugfixed: A'rpi
+ MMX2, 3DNOW optimization by Nick Kurshev
+ 32-bit C version, and and&add trick by Michael Niedermayer
+*/
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "pand     %%mm4, %%mm0  \n\t"
+            "pand     %%mm4, %%mm2  \n\t"
+            "paddw    %%mm1, %%mm0  \n\t"
+            "paddw    %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register unsigned x= *((const uint32_t *)s);
+        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
+        d+=4;
+        s+=4;
+    }
+    if (s < end) {
+        register unsigned short x= *((const uint16_t *)s);
+        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
+    }
+}
+
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
+    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
+    mm_end = end - 15;
+    while (s<mm_end) {
+        __asm__ volatile(
+            PREFETCH"  32%1         \n\t"
+            "movq        %1, %%mm0  \n\t"
+            "movq       8%1, %%mm2  \n\t"
+            "movq     %%mm0, %%mm1  \n\t"
+            "movq     %%mm2, %%mm3  \n\t"
+            "psrlq       $1, %%mm0  \n\t"
+            "psrlq       $1, %%mm2  \n\t"
+            "pand     %%mm7, %%mm0  \n\t"
+            "pand     %%mm7, %%mm2  \n\t"
+            "pand     %%mm6, %%mm1  \n\t"
+            "pand     %%mm6, %%mm3  \n\t"
+            "por      %%mm1, %%mm0  \n\t"
+            "por      %%mm3, %%mm2  \n\t"
+            MOVNTQ"   %%mm0,  %0    \n\t"
+            MOVNTQ"   %%mm2, 8%0"
+            :"=m"(*d)
+            :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    mm_end = end - 3;
+    while (s < mm_end) {
+        register uint32_t x= *((const uint32_t*)s);
+        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
+        s+=4;
+        d+=4;
+    }
+    if (s < end) {
+        register uint16_t x= *((const uint16_t*)s);
+        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
+    }
+}
+
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    mm_end = end - 15;
+#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
+    __asm__ volatile(
+        "movq           %3, %%mm5   \n\t"
+        "movq           %4, %%mm6   \n\t"
+        "movq           %5, %%mm7   \n\t"
+        "jmp 2f                     \n\t"
+        ".p2align        4          \n\t"
+        "1:                         \n\t"
+        PREFETCH"   32(%1)          \n\t"
+        "movd         (%1), %%mm0   \n\t"
+        "movd        4(%1), %%mm3   \n\t"
+        "punpckldq   8(%1), %%mm0   \n\t"
+        "punpckldq  12(%1), %%mm3   \n\t"
+        "movq        %%mm0, %%mm1   \n\t"
+        "movq        %%mm3, %%mm4   \n\t"
+        "pand        %%mm6, %%mm0   \n\t"
+        "pand        %%mm6, %%mm3   \n\t"
+        "pmaddwd     %%mm7, %%mm0   \n\t"
+        "pmaddwd     %%mm7, %%mm3   \n\t"
+        "pand        %%mm5, %%mm1   \n\t"
+        "pand        %%mm5, %%mm4   \n\t"
+        "por         %%mm1, %%mm0   \n\t"
+        "por         %%mm4, %%mm3   \n\t"
+        "psrld          $5, %%mm0   \n\t"
+        "pslld         $11, %%mm3   \n\t"
+        "por         %%mm3, %%mm0   \n\t"
+        MOVNTQ"      %%mm0, (%0)    \n\t"
+        "add           $16,  %1     \n\t"
+        "add            $8,  %0     \n\t"
+        "2:                         \n\t"
+        "cmp            %2,  %1     \n\t"
+        " jb            1b          \n\t"
+        : "+r" (d), "+r"(s)
+        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
+    );
+#else
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq    %0, %%mm7    \n\t"
+        "movq    %1, %%mm6    \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $5, %%mm1    \n\t"
+            "psrlq         $5, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $8, %%mm2    \n\t"
+            "psrlq         $8, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 16;
+    }
+#endif
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
+    }
+}
+
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psllq         $8, %%mm0    \n\t"
+            "psllq         $8, %%mm3    \n\t"
+            "pand       %%mm7, %%mm0    \n\t"
+            "pand       %%mm7, %%mm3    \n\t"
+            "psrlq         $5, %%mm1    \n\t"
+            "psrlq         $5, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq        $19, %%mm2    \n\t"
+            "psrlq        $19, %%mm5    \n\t"
+            "pand          %2, %%mm2    \n\t"
+            "pand          %2, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
+    }
+}
+
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    mm_end = end - 15;
+#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
+    __asm__ volatile(
+        "movq           %3, %%mm5   \n\t"
+        "movq           %4, %%mm6   \n\t"
+        "movq           %5, %%mm7   \n\t"
+        "jmp            2f          \n\t"
+        ".p2align        4          \n\t"
+        "1:                         \n\t"
+        PREFETCH"   32(%1)          \n\t"
+        "movd         (%1), %%mm0   \n\t"
+        "movd        4(%1), %%mm3   \n\t"
+        "punpckldq   8(%1), %%mm0   \n\t"
+        "punpckldq  12(%1), %%mm3   \n\t"
+        "movq        %%mm0, %%mm1   \n\t"
+        "movq        %%mm3, %%mm4   \n\t"
+        "pand        %%mm6, %%mm0   \n\t"
+        "pand        %%mm6, %%mm3   \n\t"
+        "pmaddwd     %%mm7, %%mm0   \n\t"
+        "pmaddwd     %%mm7, %%mm3   \n\t"
+        "pand        %%mm5, %%mm1   \n\t"
+        "pand        %%mm5, %%mm4   \n\t"
+        "por         %%mm1, %%mm0   \n\t"
+        "por         %%mm4, %%mm3   \n\t"
+        "psrld          $6, %%mm0   \n\t"
+        "pslld         $10, %%mm3   \n\t"
+        "por         %%mm3, %%mm0   \n\t"
+        MOVNTQ"      %%mm0, (%0)    \n\t"
+        "add           $16,  %1     \n\t"
+        "add            $8,  %0     \n\t"
+        "2:                         \n\t"
+        "cmp            %2,  %1     \n\t"
+        " jb            1b          \n\t"
+        : "+r" (d), "+r"(s)
+        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
+    );
+#else
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $9, %%mm2    \n\t"
+            "psrlq         $9, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+#endif
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
+    }
+}
+
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         4%1, %%mm3    \n\t"
+            "punpckldq    8%1, %%mm0    \n\t"
+            "punpckldq   12%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psllq         $7, %%mm0    \n\t"
+            "psllq         $7, %%mm3    \n\t"
+            "pand       %%mm7, %%mm0    \n\t"
+            "pand       %%mm7, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq        $19, %%mm2    \n\t"
+            "psrlq        $19, %%mm5    \n\t"
+            "pand          %2, %%mm2    \n\t"
+            "pand          %2, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
+    }
+}
+
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 11;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         3%1, %%mm3    \n\t"
+            "punpckldq    6%1, %%mm0    \n\t"
+            "punpckldq    9%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $5, %%mm1    \n\t"
+            "psrlq         $5, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $8, %%mm2    \n\t"
+            "psrlq         $8, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        const int b = *s++;
+        const int g = *s++;
+        const int r = *s++;
+        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+    }
+}
+
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         3%1, %%mm3    \n\t"
+            "punpckldq    6%1, %%mm0    \n\t"
+            "punpckldq    9%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psllq         $8, %%mm0    \n\t"
+            "psllq         $8, %%mm3    \n\t"
+            "pand       %%mm7, %%mm0    \n\t"
+            "pand       %%mm7, %%mm3    \n\t"
+            "psrlq         $5, %%mm1    \n\t"
+            "psrlq         $5, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq        $19, %%mm2    \n\t"
+            "psrlq        $19, %%mm5    \n\t"
+            "pand          %2, %%mm2    \n\t"
+            "pand          %2, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        const int r = *s++;
+        const int g = *s++;
+        const int b = *s++;
+        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+    }
+}
+
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 11;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "movd         3%1, %%mm3    \n\t"
+            "punpckldq    6%1, %%mm0    \n\t"
+            "punpckldq    9%1, %%mm3    \n\t"
+            "movq       %%mm0, %%mm1    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm3, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "psrlq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm3    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %2, %%mm3    \n\t"
+            "psrlq         $6, %%mm1    \n\t"
+            "psrlq         $6, %%mm4    \n\t"
+            "pand       %%mm6, %%mm1    \n\t"
+            "pand       %%mm6, %%mm4    \n\t"
+            "psrlq         $9, %%mm2    \n\t"
+            "psrlq         $9, %%mm5    \n\t"
+            "pand       %%mm7, %%mm2    \n\t"
+            "pand       %%mm7, %%mm5    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            "psllq        $16, %%mm3    \n\t"
+            "por        %%mm3, %%mm0    \n\t"
+            MOVNTQ"     %%mm0, %0       \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        const int b = *s++;
+        const int g = *s++;
+        const int r = *s++;
+        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+    }
+}
+
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+    const uint8_t *mm_end;
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"   32%1            \n\t"
+            "movd         %1, %%mm0     \n\t"
+            "movd        3%1, %%mm3     \n\t"
+            "punpckldq   6%1, %%mm0     \n\t"
+            "punpckldq   9%1, %%mm3     \n\t"
+            "movq      %%mm0, %%mm1     \n\t"
+            "movq      %%mm0, %%mm2     \n\t"
+            "movq      %%mm3, %%mm4     \n\t"
+            "movq      %%mm3, %%mm5     \n\t"
+            "psllq        $7, %%mm0     \n\t"
+            "psllq        $7, %%mm3     \n\t"
+            "pand      %%mm7, %%mm0     \n\t"
+            "pand      %%mm7, %%mm3     \n\t"
+            "psrlq        $6, %%mm1     \n\t"
+            "psrlq        $6, %%mm4     \n\t"
+            "pand      %%mm6, %%mm1     \n\t"
+            "pand      %%mm6, %%mm4     \n\t"
+            "psrlq       $19, %%mm2     \n\t"
+            "psrlq       $19, %%mm5     \n\t"
+            "pand         %2, %%mm2     \n\t"
+            "pand         %2, %%mm5     \n\t"
+            "por       %%mm1, %%mm0     \n\t"
+            "por       %%mm4, %%mm3     \n\t"
+            "por       %%mm2, %%mm0     \n\t"
+            "por       %%mm5, %%mm3     \n\t"
+            "psllq       $16, %%mm3     \n\t"
+            "por       %%mm3, %%mm0     \n\t"
+            MOVNTQ"    %%mm0, %0        \n\t"
+            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        const int r = *s++;
+        const int g = *s++;
+        const int b = *s++;
+        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+    }
+}
+
+/*
+  I use less accurate approximation here by simply left-shifting the input
+  value and filling the low order bits with zeroes. This method improves PNG
+  compression but this scheme cannot reproduce white exactly, since it does
+  not generate an all-ones maximum value; the net effect is to darken the
+  image slightly.
+
+  The better method should be "left bit replication":
+
+   4 3 2 1 0
+   ---------
+   1 1 0 1 1
+
+   7 6 5 4 3  2 1 0
+   ----------------
+   1 1 0 1 1  1 1 0
+   |=======|  |===|
+       |      leftmost bits repeated to fill open bits
+       |
+   original bits
+*/
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    const uint16_t *mm_end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t*)src;
+    end = s + src_size/2;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 7;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+
+            "movq       %%mm0, %%mm6    \n\t"
+            "movq       %%mm3, %%mm7    \n\t"
+
+            "movq         8%1, %%mm0    \n\t"
+            "movq         8%1, %%mm1    \n\t"
+            "movq         8%1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+
+            :"=m"(*d)
+            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+            :"memory");
+        /* borrowed 32 to 24 */
+        __asm__ volatile(
+            "movq       %%mm0, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "movq       %%mm6, %%mm0    \n\t"
+            "movq       %%mm7, %%mm1    \n\t"
+
+            "movq       %%mm4, %%mm6    \n\t"
+            "movq       %%mm5, %%mm7    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm1, %%mm3    \n\t"
+
+            STORE_BGR24_MMX
+
+            :"=m"(*d)
+            :"m"(*s)
+            :"memory");
+        d += 24;
+        s += 8;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x7C00)>>7;
+    }
+}
+
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    const uint16_t *mm_end;
+    uint8_t *d = (uint8_t *)dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 7;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm1    \n\t"
+            "psrlq         $8, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+
+            "movq       %%mm0, %%mm6    \n\t"
+            "movq       %%mm3, %%mm7    \n\t"
+
+            "movq         8%1, %%mm0    \n\t"
+            "movq         8%1, %%mm1    \n\t"
+            "movq         8%1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm1    \n\t"
+            "psrlq         $8, %%mm2    \n\t"
+            "movq       %%mm0, %%mm3    \n\t"
+            "movq       %%mm1, %%mm4    \n\t"
+            "movq       %%mm2, %%mm5    \n\t"
+            "punpcklwd     %5, %%mm0    \n\t"
+            "punpcklwd     %5, %%mm1    \n\t"
+            "punpcklwd     %5, %%mm2    \n\t"
+            "punpckhwd     %5, %%mm3    \n\t"
+            "punpckhwd     %5, %%mm4    \n\t"
+            "punpckhwd     %5, %%mm5    \n\t"
+            "psllq         $8, %%mm1    \n\t"
+            "psllq        $16, %%mm2    \n\t"
+            "por        %%mm1, %%mm0    \n\t"
+            "por        %%mm2, %%mm0    \n\t"
+            "psllq         $8, %%mm4    \n\t"
+            "psllq        $16, %%mm5    \n\t"
+            "por        %%mm4, %%mm3    \n\t"
+            "por        %%mm5, %%mm3    \n\t"
+            :"=m"(*d)
+            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+            :"memory");
+        /* borrowed 32 to 24 */
+        __asm__ volatile(
+            "movq       %%mm0, %%mm4    \n\t"
+            "movq       %%mm3, %%mm5    \n\t"
+            "movq       %%mm6, %%mm0    \n\t"
+            "movq       %%mm7, %%mm1    \n\t"
+
+            "movq       %%mm4, %%mm6    \n\t"
+            "movq       %%mm5, %%mm7    \n\t"
+            "movq       %%mm0, %%mm2    \n\t"
+            "movq       %%mm1, %%mm3    \n\t"
+
+            STORE_BGR24_MMX
+
+            :"=m"(*d)
+            :"m"(*s)
+            :"memory");
+        d += 24;
+        s += 8;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0xF800)>>8;
+    }
+}
+
+/*
+ * mm0 = 00 B3 00 B2 00 B1 00 B0
+ * mm1 = 00 G3 00 G2 00 G1 00 G0
+ * mm2 = 00 R3 00 R2 00 R1 00 R0
+ * mm6 = FF FF FF FF FF FF FF FF
+ * mm7 = 00 00 00 00 00 00 00 00
+ */
+#define PACK_RGB32 \
+    "packuswb   %%mm7, %%mm0    \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
+    "packuswb   %%mm7, %%mm1    \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
+    "packuswb   %%mm7, %%mm2    \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
+    "punpcklbw  %%mm1, %%mm0    \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
+    "punpcklbw  %%mm6, %%mm2    \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
+    "movq       %%mm0, %%mm3    \n\t"                               \
+    "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
+    "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
+    MOVNTQ"     %%mm0,  %0      \n\t"                               \
+    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
+
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    const uint16_t *mm_end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
+    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
+    mm_end = end - 3;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $2, %%mm1    \n\t"
+            "psrlq         $7, %%mm2    \n\t"
+            PACK_RGB32
+            :"=m"(*d)
+            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+            :"memory");
+        d += 16;
+        s += 4;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x7C00)>>7;
+        *d++ = 255;
+    }
+}
+
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    const uint16_t *mm_end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t*)src;
+    end = s + src_size/2;
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
+    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
+    mm_end = end - 3;
+    while (s < mm_end) {
+        __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movq          %1, %%mm0    \n\t"
+            "movq          %1, %%mm1    \n\t"
+            "movq          %1, %%mm2    \n\t"
+            "pand          %2, %%mm0    \n\t"
+            "pand          %3, %%mm1    \n\t"
+            "pand          %4, %%mm2    \n\t"
+            "psllq         $3, %%mm0    \n\t"
+            "psrlq         $3, %%mm1    \n\t"
+            "psrlq         $8, %%mm2    \n\t"
+            PACK_RGB32
+            :"=m"(*d)
+            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+            :"memory");
+        d += 16;
+        s += 4;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+    while (s < end) {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0xF800)>>8;
+        *d++ = 255;
+    }
+}
+
+static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    x86_reg idx = 15 - src_size;
+    const uint8_t *s = src-idx;
+    uint8_t *d = dst-idx;
+    __asm__ volatile(
+        "test          %0, %0           \n\t"
+        "jns           2f               \n\t"
+        PREFETCH"       (%1, %0)        \n\t"
+        "movq          %3, %%mm7        \n\t"
+        "pxor          %4, %%mm7        \n\t"
+        "movq       %%mm7, %%mm6        \n\t"
+        "pxor          %5, %%mm7        \n\t"
+        ".p2align       4               \n\t"
+        "1:                             \n\t"
+        PREFETCH"     32(%1, %0)        \n\t"
+        "movq           (%1, %0), %%mm0 \n\t"
+        "movq          8(%1, %0), %%mm1 \n\t"
+# if COMPILE_TEMPLATE_MMX2
+        "pshufw      $177, %%mm0, %%mm3 \n\t"
+        "pshufw      $177, %%mm1, %%mm5 \n\t"
+        "pand       %%mm7, %%mm0        \n\t"
+        "pand       %%mm6, %%mm3        \n\t"
+        "pand       %%mm7, %%mm1        \n\t"
+        "pand       %%mm6, %%mm5        \n\t"
+        "por        %%mm3, %%mm0        \n\t"
+        "por        %%mm5, %%mm1        \n\t"
+# else
+        "movq       %%mm0, %%mm2        \n\t"
+        "movq       %%mm1, %%mm4        \n\t"
+        "pand       %%mm7, %%mm0        \n\t"
+        "pand       %%mm6, %%mm2        \n\t"
+        "pand       %%mm7, %%mm1        \n\t"
+        "pand       %%mm6, %%mm4        \n\t"
+        "movq       %%mm2, %%mm3        \n\t"
+        "movq       %%mm4, %%mm5        \n\t"
+        "pslld        $16, %%mm2        \n\t"
+        "psrld        $16, %%mm3        \n\t"
+        "pslld        $16, %%mm4        \n\t"
+        "psrld        $16, %%mm5        \n\t"
+        "por        %%mm2, %%mm0        \n\t"
+        "por        %%mm4, %%mm1        \n\t"
+        "por        %%mm3, %%mm0        \n\t"
+        "por        %%mm5, %%mm1        \n\t"
+# endif
+        MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
+        MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
+        "add          $16, %0           \n\t"
+        "js            1b               \n\t"
+        SFENCE"                         \n\t"
+        EMMS"                           \n\t"
+        "2:                             \n\t"
+        : "+&r"(idx)
+        : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
+        : "memory");
+    for (; idx<15; idx+=4) {
+        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
+        v &= 0xff00ff;
+        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
+    }
+}
+
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    unsigned i;
+    x86_reg mmx_size= 23 - src_size;
+    __asm__ volatile (
+        "test             %%"REG_a", %%"REG_a"          \n\t"
+        "jns                     2f                     \n\t"
+        "movq     "MANGLE(mask24r)", %%mm5              \n\t"
+        "movq     "MANGLE(mask24g)", %%mm6              \n\t"
+        "movq     "MANGLE(mask24b)", %%mm7              \n\t"
+        ".p2align                 4                     \n\t"
+        "1:                                             \n\t"
+        PREFETCH" 32(%1, %%"REG_a")                     \n\t"
+        "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
+        "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
+        "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
+        "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
+        "pand                 %%mm5, %%mm0              \n\t"
+        "pand                 %%mm6, %%mm1              \n\t"
+        "pand                 %%mm7, %%mm2              \n\t"
+        "por                  %%mm0, %%mm1              \n\t"
+        "por                  %%mm2, %%mm1              \n\t"
+        "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
+        MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
+        "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
+        "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
+        "pand                 %%mm7, %%mm0              \n\t"
+        "pand                 %%mm5, %%mm1              \n\t"
+        "pand                 %%mm6, %%mm2              \n\t"
+        "por                  %%mm0, %%mm1              \n\t"
+        "por                  %%mm2, %%mm1              \n\t"
+        "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
+        MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
+        "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
+        "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
+        "pand                 %%mm6, %%mm0              \n\t"
+        "pand                 %%mm7, %%mm1              \n\t"
+        "pand                 %%mm5, %%mm2              \n\t"
+        "por                  %%mm0, %%mm1              \n\t"
+        "por                  %%mm2, %%mm1              \n\t"
+        MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
+        "add                    $24, %%"REG_a"          \n\t"
+        " js                     1b                     \n\t"
+        "2:                                             \n\t"
+        : "+a" (mmx_size)
+        : "r" (src-mmx_size), "r"(dst-mmx_size)
+    );
+
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+
+    if (mmx_size==23) return; //finished, was multiple of 8
+
+    src+= src_size;
+    dst+= src_size;
+    src_size= 23-mmx_size;
+    src-= src_size;
+    dst-= src_size;
+    for (i=0; i<src_size; i+=3) {
+        register uint8_t x;
+        x          = src[i + 2];
+        dst[i + 1] = src[i + 1];
+        dst[i + 2] = src[i + 0];
+        dst[i + 0] = x;
+    }
+}
+
+static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                           long width, long height,
+                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+{
+    long y;
+    const x86_reg chromWidth= width>>1;
+    for (y=0; y<height; y++) {
+        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
+        __asm__ volatile(
+            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            ".p2align                    4              \n\t"
+            "1:                                         \n\t"
+            PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
+            PREFETCH"    32(%2, %%"REG_a")              \n\t"
+            PREFETCH"    32(%3, %%"REG_a")              \n\t"
+            "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
+            "movq                    %%mm0, %%mm2       \n\t" // U(0)
+            "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
+            "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
+            "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
+
+            "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
+            "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
+            "movq                    %%mm3, %%mm4       \n\t" // Y(0)
+            "movq                    %%mm5, %%mm6       \n\t" // Y(8)
+            "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
+            "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
+            "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
+            "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
+
+            MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
+
+            "add                        $8, %%"REG_a"   \n\t"
+            "cmp                        %4, %%"REG_a"   \n\t"
+            " jb                        1b              \n\t"
+            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
+            : "%"REG_a
+        );
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst  += dstStride;
+    }
+    __asm__(EMMS"       \n\t"
+            SFENCE"     \n\t"
+            :::"memory");
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long dstStride)
+{
+    //FIXME interpolate chroma
+    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+}
+
+static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                           long width, long height,
+                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+{
+    long y;
+    const x86_reg chromWidth= width>>1;
+    for (y=0; y<height; y++) {
+        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
+        __asm__ volatile(
+            "xor                %%"REG_a", %%"REG_a"    \n\t"
+            ".p2align                   4               \n\t"
+            "1:                                         \n\t"
+            PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
+            PREFETCH"   32(%2, %%"REG_a")               \n\t"
+            PREFETCH"   32(%3, %%"REG_a")               \n\t"
+            "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
+            "movq                   %%mm0, %%mm2        \n\t" // U(0)
+            "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
+            "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
+            "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
+
+            "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
+            "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
+            "movq                   %%mm0, %%mm4        \n\t" // Y(0)
+            "movq                   %%mm2, %%mm6        \n\t" // Y(8)
+            "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
+            "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
+            "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
+            "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
+
+            MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
+
+            "add                       $8, %%"REG_a"    \n\t"
+            "cmp                       %4, %%"REG_a"    \n\t"
+            " jb                       1b               \n\t"
+            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
+            : "%"REG_a
+        );
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+    __asm__(EMMS"       \n\t"
+            SFENCE"     \n\t"
+            :::"memory");
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long dstStride)
+{
+    //FIXME interpolate chroma
+    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+}
+
+/**
+ * Width should be a multiple of 16.
+ */
+static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                         long width, long height,
+                                         long lumStride, long chromStride, long dstStride)
+{
+    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+}
+
+/**
+ * Width should be a multiple of 16.
+ */
+static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                         long width, long height,
+                                         long lumStride, long chromStride, long dstStride)
+{
+    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const x86_reg chromWidth= width>>1;
+    for (y=0; y<height; y+=2) {
+        __asm__ volatile(
+            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "pcmpeqw                 %%mm7, %%mm7       \n\t"
+            "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
+            ".p2align                    4              \n\t"
+            "1:                \n\t"
+            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
+            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+            "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
+            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
+            "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
+            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
+            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
+            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
+            "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
+            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
+
+            MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
+
+            "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
+            "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
+            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
+            "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
+            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
+            "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
+            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
+            "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
+            "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
+            "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
+
+            MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+
+            "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
+            "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
+            "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
+            "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
+            "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
+            "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
+            "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
+            "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
+
+            MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
+            MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
+
+            "add                        $8, %%"REG_a"   \n\t"
+            "cmp                        %4, %%"REG_a"   \n\t"
+            " jb                        1b              \n\t"
+            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+            : "memory", "%"REG_a
+        );
+
+        ydst += lumStride;
+        src  += srcStride;
+
+        __asm__ volatile(
+            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            ".p2align                    4              \n\t"
+            "1:                                         \n\t"
+            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
+            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+            "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
+            "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
+            "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
+            "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
+            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
+            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
+            "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
+            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
+
+            MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+
+            "add                        $8, %%"REG_a"   \n\t"
+            "cmp                        %4, %%"REG_a"   \n\t"
+            " jb                        1b              \n\t"
+
+            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+            : "memory", "%"REG_a
+        );
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+    __asm__ volatile(EMMS"       \n\t"
+                     SFENCE"     \n\t"
+                     :::"memory");
+}
+
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+{
+    long x,y;
+
+    dst[0]= src[0];
+
+    // first line
+    for (x=0; x<srcWidth-1; x++) {
+        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    }
+    dst[2*srcWidth-1]= src[srcWidth-1];
+
+    dst+= dstStride;
+
+    for (y=1; y<srcHeight; y++) {
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+        const x86_reg mmxSize= srcWidth&~15;
+        __asm__ volatile(
+            "mov           %4, %%"REG_a"            \n\t"
+            "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
+            "movq         (%0, %%"REG_a"), %%mm4    \n\t"
+            "movq                   %%mm4, %%mm2    \n\t"
+            "psllq                     $8, %%mm4    \n\t"
+            "pand                   %%mm0, %%mm2    \n\t"
+            "por                    %%mm2, %%mm4    \n\t"
+            "movq         (%1, %%"REG_a"), %%mm5    \n\t"
+            "movq                   %%mm5, %%mm3    \n\t"
+            "psllq                     $8, %%mm5    \n\t"
+            "pand                   %%mm0, %%mm3    \n\t"
+            "por                    %%mm3, %%mm5    \n\t"
+            "1:                                     \n\t"
+            "movq         (%0, %%"REG_a"), %%mm0    \n\t"
+            "movq         (%1, %%"REG_a"), %%mm1    \n\t"
+            "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
+            "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
+            PAVGB"                  %%mm0, %%mm5    \n\t"
+            PAVGB"                  %%mm0, %%mm3    \n\t"
+            PAVGB"                  %%mm0, %%mm5    \n\t"
+            PAVGB"                  %%mm0, %%mm3    \n\t"
+            PAVGB"                  %%mm1, %%mm4    \n\t"
+            PAVGB"                  %%mm1, %%mm2    \n\t"
+            PAVGB"                  %%mm1, %%mm4    \n\t"
+            PAVGB"                  %%mm1, %%mm2    \n\t"
+            "movq                   %%mm5, %%mm7    \n\t"
+            "movq                   %%mm4, %%mm6    \n\t"
+            "punpcklbw              %%mm3, %%mm5    \n\t"
+            "punpckhbw              %%mm3, %%mm7    \n\t"
+            "punpcklbw              %%mm2, %%mm4    \n\t"
+            "punpckhbw              %%mm2, %%mm6    \n\t"
+#if 1
+            MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
+#else
+            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
+            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
+            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
+            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
+#endif
+            "add                       $8, %%"REG_a"            \n\t"
+            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
+            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
+            " js                       1b                       \n\t"
+            :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
+               "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
+               "g" (-mmxSize)
+            : "%"REG_a
+        );
+#else
+        const x86_reg mmxSize=1;
+
+        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
+        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
+#endif
+
+        for (x=mmxSize-1; x<srcWidth-1; x++) {
+            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
+            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
+            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
+            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
+        }
+        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
+        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
+
+        dst+=dstStride*2;
+        src+=srcStride;
+    }
+
+    // last line
+#if 1
+    dst[0]= src[0];
+
+    for (x=0; x<srcWidth-1; x++) {
+        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    }
+    dst[2*srcWidth-1]= src[srcWidth-1];
+#else
+    for (x=0; x<srcWidth; x++) {
+        dst[2*x+0]=
+        dst[2*x+1]= src[x];
+    }
+#endif
+
+    __asm__ volatile(EMMS"       \n\t"
+                     SFENCE"     \n\t"
+                     :::"memory");
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write HQ version.
+ */
+static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const x86_reg chromWidth= width>>1;
+    for (y=0; y<height; y+=2) {
+        __asm__ volatile(
+            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "pcmpeqw             %%mm7, %%mm7   \n\t"
+            "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
+            ".p2align                4          \n\t"
+            "1:                                 \n\t"
+            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
+            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
+            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
+            "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
+            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
+            "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
+            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
+            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
+            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
+            "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
+            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
+
+            MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
+
+            "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
+            "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
+            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
+            "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
+            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
+            "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
+            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
+            "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
+            "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
+            "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
+
+            MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+
+            "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
+            "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
+            "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
+            "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
+            "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
+            "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
+            "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
+            "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
+
+            MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
+            MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
+
+            "add                    $8, %%"REG_a"   \n\t"
+            "cmp                    %4, %%"REG_a"   \n\t"
+            " jb                    1b          \n\t"
+            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+            : "memory", "%"REG_a
+        );
+
+        ydst += lumStride;
+        src  += srcStride;
+
+        __asm__ volatile(
+            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            ".p2align                    4              \n\t"
+            "1:                                 \n\t"
+            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
+            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
+            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
+            "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
+            "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
+            "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
+            "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
+            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
+            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
+            "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
+            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
+
+            MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+
+            "add                    $8, %%"REG_a"   \n\t"
+            "cmp                    %4, %%"REG_a"   \n\t"
+            " jb                    1b          \n\t"
+
+            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+            : "memory", "%"REG_a
+        );
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+    __asm__ volatile(EMMS"       \n\t"
+                     SFENCE"     \n\t"
+                     :::"memory");
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 2.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line,
+ * others are ignored in the C version.
+ * FIXME: Write HQ version.
+ */
+static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                       long width, long height,
+                                       long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const x86_reg chromWidth= width>>1;
+    for (y=0; y<height-2; y+=2) {
+        long i;
+        for (i=0; i<2; i++) {
+            __asm__ volatile(
+                "mov                        %2, %%"REG_a"   \n\t"
+                "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
+                "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+                "pxor                    %%mm7, %%mm7       \n\t"
+                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
+                ".p2align                    4              \n\t"
+                "1:                                         \n\t"
+                PREFETCH"    64(%0, %%"REG_d")              \n\t"
+                "movd          (%0, %%"REG_d"), %%mm0       \n\t"
+                "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
+                "punpcklbw               %%mm7, %%mm0       \n\t"
+                "punpcklbw               %%mm7, %%mm1       \n\t"
+                "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
+                "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
+                "punpcklbw               %%mm7, %%mm2       \n\t"
+                "punpcklbw               %%mm7, %%mm3       \n\t"
+                "pmaddwd                 %%mm6, %%mm0       \n\t"
+                "pmaddwd                 %%mm6, %%mm1       \n\t"
+                "pmaddwd                 %%mm6, %%mm2       \n\t"
+                "pmaddwd                 %%mm6, %%mm3       \n\t"
+#ifndef FAST_BGR2YV12
+                "psrad                      $8, %%mm0       \n\t"
+                "psrad                      $8, %%mm1       \n\t"
+                "psrad                      $8, %%mm2       \n\t"
+                "psrad                      $8, %%mm3       \n\t"
+#endif
+                "packssdw                %%mm1, %%mm0       \n\t"
+                "packssdw                %%mm3, %%mm2       \n\t"
+                "pmaddwd                 %%mm5, %%mm0       \n\t"
+                "pmaddwd                 %%mm5, %%mm2       \n\t"
+                "packssdw                %%mm2, %%mm0       \n\t"
+                "psraw                      $7, %%mm0       \n\t"
+
+                "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
+                "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
+                "punpcklbw               %%mm7, %%mm4       \n\t"
+                "punpcklbw               %%mm7, %%mm1       \n\t"
+                "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
+                "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
+                "punpcklbw               %%mm7, %%mm2       \n\t"
+                "punpcklbw               %%mm7, %%mm3       \n\t"
+                "pmaddwd                 %%mm6, %%mm4       \n\t"
+                "pmaddwd                 %%mm6, %%mm1       \n\t"
+                "pmaddwd                 %%mm6, %%mm2       \n\t"
+                "pmaddwd                 %%mm6, %%mm3       \n\t"
+#ifndef FAST_BGR2YV12
+                "psrad                      $8, %%mm4       \n\t"
+                "psrad                      $8, %%mm1       \n\t"
+                "psrad                      $8, %%mm2       \n\t"
+                "psrad                      $8, %%mm3       \n\t"
+#endif
+                "packssdw                %%mm1, %%mm4       \n\t"
+                "packssdw                %%mm3, %%mm2       \n\t"
+                "pmaddwd                 %%mm5, %%mm4       \n\t"
+                "pmaddwd                 %%mm5, %%mm2       \n\t"
+                "add                       $24, %%"REG_d"   \n\t"
+                "packssdw                %%mm2, %%mm4       \n\t"
+                "psraw                      $7, %%mm4       \n\t"
+
+                "packuswb                %%mm4, %%mm0       \n\t"
+                "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
+
+                MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
+                "add                        $8,      %%"REG_a"  \n\t"
+                " js                        1b                  \n\t"
+                : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
+                : "%"REG_a, "%"REG_d
+            );
+            ydst += lumStride;
+            src  += srcStride;
+        }
+        src -= srcStride*2;
+        __asm__ volatile(
+            "mov                        %4, %%"REG_a"   \n\t"
+            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+            "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
+            "pxor                    %%mm7, %%mm7       \n\t"
+            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
+            "add                 %%"REG_d", %%"REG_d"   \n\t"
+            ".p2align                    4              \n\t"
+            "1:                                         \n\t"
+            PREFETCH"    64(%0, %%"REG_d")              \n\t"
+            PREFETCH"    64(%1, %%"REG_d")              \n\t"
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+            "movq          (%0, %%"REG_d"), %%mm0       \n\t"
+            "movq          (%1, %%"REG_d"), %%mm1       \n\t"
+            "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
+            "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
+            PAVGB"                   %%mm1, %%mm0       \n\t"
+            PAVGB"                   %%mm3, %%mm2       \n\t"
+            "movq                    %%mm0, %%mm1       \n\t"
+            "movq                    %%mm2, %%mm3       \n\t"
+            "psrlq                     $24, %%mm0       \n\t"
+            "psrlq                     $24, %%mm2       \n\t"
+            PAVGB"                   %%mm1, %%mm0       \n\t"
+            PAVGB"                   %%mm3, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm0       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+#else
+            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
+            "movd          (%1, %%"REG_d"), %%mm1       \n\t"
+            "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm0       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "paddw                   %%mm1, %%mm0       \n\t"
+            "paddw                   %%mm3, %%mm2       \n\t"
+            "paddw                   %%mm2, %%mm0       \n\t"
+            "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
+            "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
+            "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm4       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "paddw                   %%mm1, %%mm4       \n\t"
+            "paddw                   %%mm3, %%mm2       \n\t"
+            "paddw                   %%mm4, %%mm2       \n\t"
+            "psrlw                      $2, %%mm0       \n\t"
+            "psrlw                      $2, %%mm2       \n\t"
+#endif
+            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
+            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
+
+            "pmaddwd                 %%mm0, %%mm1       \n\t"
+            "pmaddwd                 %%mm2, %%mm3       \n\t"
+            "pmaddwd                 %%mm6, %%mm0       \n\t"
+            "pmaddwd                 %%mm6, %%mm2       \n\t"
+#ifndef FAST_BGR2YV12
+            "psrad                      $8, %%mm0       \n\t"
+            "psrad                      $8, %%mm1       \n\t"
+            "psrad                      $8, %%mm2       \n\t"
+            "psrad                      $8, %%mm3       \n\t"
+#endif
+            "packssdw                %%mm2, %%mm0       \n\t"
+            "packssdw                %%mm3, %%mm1       \n\t"
+            "pmaddwd                 %%mm5, %%mm0       \n\t"
+            "pmaddwd                 %%mm5, %%mm1       \n\t"
+            "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
+            "psraw                      $7, %%mm0       \n\t"
+
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+            "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
+            "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
+            "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
+            "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
+            PAVGB"                   %%mm1, %%mm4       \n\t"
+            PAVGB"                   %%mm3, %%mm2       \n\t"
+            "movq                    %%mm4, %%mm1       \n\t"
+            "movq                    %%mm2, %%mm3       \n\t"
+            "psrlq                     $24, %%mm4       \n\t"
+            "psrlq                     $24, %%mm2       \n\t"
+            PAVGB"                   %%mm1, %%mm4       \n\t"
+            PAVGB"                   %%mm3, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm4       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+#else
+            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
+            "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
+            "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm4       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "paddw                   %%mm1, %%mm4       \n\t"
+            "paddw                   %%mm3, %%mm2       \n\t"
+            "paddw                   %%mm2, %%mm4       \n\t"
+            "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
+            "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
+            "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm5       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "paddw                   %%mm1, %%mm5       \n\t"
+            "paddw                   %%mm3, %%mm2       \n\t"
+            "paddw                   %%mm5, %%mm2       \n\t"
+            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+            "psrlw                      $2, %%mm4       \n\t"
+            "psrlw                      $2, %%mm2       \n\t"
+#endif
+            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
+            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
+
+            "pmaddwd                 %%mm4, %%mm1       \n\t"
+            "pmaddwd                 %%mm2, %%mm3       \n\t"
+            "pmaddwd                 %%mm6, %%mm4       \n\t"
+            "pmaddwd                 %%mm6, %%mm2       \n\t"
+#ifndef FAST_BGR2YV12
+            "psrad                      $8, %%mm4       \n\t"
+            "psrad                      $8, %%mm1       \n\t"
+            "psrad                      $8, %%mm2       \n\t"
+            "psrad                      $8, %%mm3       \n\t"
+#endif
+            "packssdw                %%mm2, %%mm4       \n\t"
+            "packssdw                %%mm3, %%mm1       \n\t"
+            "pmaddwd                 %%mm5, %%mm4       \n\t"
+            "pmaddwd                 %%mm5, %%mm1       \n\t"
+            "add                       $24, %%"REG_d"   \n\t"
+            "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
+            "psraw                      $7, %%mm4       \n\t"
+
+            "movq                    %%mm0, %%mm1           \n\t"
+            "punpckldq               %%mm4, %%mm0           \n\t"
+            "punpckhdq               %%mm4, %%mm1           \n\t"
+            "packsswb                %%mm1, %%mm0           \n\t"
+            "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
+            "movd                    %%mm0, (%2, %%"REG_a") \n\t"
+            "punpckhdq               %%mm0, %%mm0           \n\t"
+            "movd                    %%mm0, (%3, %%"REG_a") \n\t"
+            "add                        $4, %%"REG_a"       \n\t"
+            " js                        1b                  \n\t"
+            : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
+            : "%"REG_a, "%"REG_d
+        );
+
+        udst += chromStride;
+        vdst += chromStride;
+        src  += srcStride*2;
+    }
+
+    __asm__ volatile(EMMS"       \n\t"
+                     SFENCE"     \n\t"
+                     :::"memory");
+
+    for (; y<height; y+=2) {
+        long i;
+        for (i=0; i<chromWidth; i++) {
+            unsigned int b = src[6*i+0];
+            unsigned int g = src[6*i+1];
+            unsigned int r = src[6*i+2];
+
+            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
+            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2*i]   = Y;
+
+            b = src[6*i+3];
+            g = src[6*i+4];
+            r = src[6*i+5];
+
+            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            ydst[2*i+1]     = Y;
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        for (i=0; i<chromWidth; i++) {
+            unsigned int b = src[6*i+0];
+            unsigned int g = src[6*i+1];
+            unsigned int r = src[6*i+2];
+
+            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+
+            ydst[2*i]     = Y;
+
+            b = src[6*i+3];
+            g = src[6*i+4];
+            r = src[6*i+5];
+
+            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            ydst[2*i+1]     = Y;
+        }
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+}
+
+static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
+                                    long width, long height, long src1Stride,
+                                    long src2Stride, long dstStride)
+{
+    long h;
+
+    for (h=0; h < height; h++) {
+        long w;
+
+#if COMPILE_TEMPLATE_SSE2
+        __asm__(
+            "xor              %%"REG_a", %%"REG_a"  \n\t"
+            "1:                                     \n\t"
+            PREFETCH" 64(%1, %%"REG_a")             \n\t"
+            PREFETCH" 64(%2, %%"REG_a")             \n\t"
+            "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
+            "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
+            "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
+            "punpcklbw           %%xmm2, %%xmm0     \n\t"
+            "punpckhbw           %%xmm2, %%xmm1     \n\t"
+            "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
+            "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
+            "add                    $16, %%"REG_a"  \n\t"
+            "cmp                     %3, %%"REG_a"  \n\t"
+            " jb                     1b             \n\t"
+            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
+            : "memory", "%"REG_a""
+        );
+#else
+        __asm__(
+            "xor %%"REG_a", %%"REG_a"               \n\t"
+            "1:                                     \n\t"
+            PREFETCH" 64(%1, %%"REG_a")             \n\t"
+            PREFETCH" 64(%2, %%"REG_a")             \n\t"
+            "movq       (%1, %%"REG_a"), %%mm0      \n\t"
+            "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
+            "movq                 %%mm0, %%mm1      \n\t"
+            "movq                 %%mm2, %%mm3      \n\t"
+            "movq       (%2, %%"REG_a"), %%mm4      \n\t"
+            "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
+            "punpcklbw            %%mm4, %%mm0      \n\t"
+            "punpckhbw            %%mm4, %%mm1      \n\t"
+            "punpcklbw            %%mm5, %%mm2      \n\t"
+            "punpckhbw            %%mm5, %%mm3      \n\t"
+            MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
+            MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
+            MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
+            MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
+            "add                    $16, %%"REG_a"  \n\t"
+            "cmp                     %3, %%"REG_a"  \n\t"
+            " jb                     1b             \n\t"
+            ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
+            : "memory", "%"REG_a
+        );
+#endif
+        for (w= (width&(~15)); w < width; w++) {
+            dest[2*w+0] = src1[w];
+            dest[2*w+1] = src2[w];
+        }
+        dest += dstStride;
+        src1 += src1Stride;
+        src2 += src2Stride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+            );
+}
+
+static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
+                                       uint8_t *dst1, uint8_t *dst2,
+                                       long width, long height,
+                                       long srcStride1, long srcStride2,
+                                       long dstStride1, long dstStride2)
+{
+    x86_reg y;
+    long x,w,h;
+    w=width/2; h=height/2;
+    __asm__ volatile(
+        PREFETCH" %0    \n\t"
+        PREFETCH" %1    \n\t"
+        ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
+    for (y=0;y<h;y++) {
+        const uint8_t* s1=src1+srcStride1*(y>>1);
+        uint8_t* d=dst1+dstStride1*y;
+        x=0;
+        for (;x<w-31;x+=32) {
+            __asm__ volatile(
+                PREFETCH"   32%1        \n\t"
+                "movq         %1, %%mm0 \n\t"
+                "movq        8%1, %%mm2 \n\t"
+                "movq       16%1, %%mm4 \n\t"
+                "movq       24%1, %%mm6 \n\t"
+                "movq      %%mm0, %%mm1 \n\t"
+                "movq      %%mm2, %%mm3 \n\t"
+                "movq      %%mm4, %%mm5 \n\t"
+                "movq      %%mm6, %%mm7 \n\t"
+                "punpcklbw %%mm0, %%mm0 \n\t"
+                "punpckhbw %%mm1, %%mm1 \n\t"
+                "punpcklbw %%mm2, %%mm2 \n\t"
+                "punpckhbw %%mm3, %%mm3 \n\t"
+                "punpcklbw %%mm4, %%mm4 \n\t"
+                "punpckhbw %%mm5, %%mm5 \n\t"
+                "punpcklbw %%mm6, %%mm6 \n\t"
+                "punpckhbw %%mm7, %%mm7 \n\t"
+                MOVNTQ"    %%mm0,   %0  \n\t"
+                MOVNTQ"    %%mm1,  8%0  \n\t"
+                MOVNTQ"    %%mm2, 16%0  \n\t"
+                MOVNTQ"    %%mm3, 24%0  \n\t"
+                MOVNTQ"    %%mm4, 32%0  \n\t"
+                MOVNTQ"    %%mm5, 40%0  \n\t"
+                MOVNTQ"    %%mm6, 48%0  \n\t"
+                MOVNTQ"    %%mm7, 56%0"
+                :"=m"(d[2*x])
+                :"m"(s1[x])
+                :"memory");
+        }
+        for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
+    }
+    for (y=0;y<h;y++) {
+        const uint8_t* s2=src2+srcStride2*(y>>1);
+        uint8_t* d=dst2+dstStride2*y;
+        x=0;
+        for (;x<w-31;x+=32) {
+            __asm__ volatile(
+                PREFETCH"   32%1        \n\t"
+                "movq         %1, %%mm0 \n\t"
+                "movq        8%1, %%mm2 \n\t"
+                "movq       16%1, %%mm4 \n\t"
+                "movq       24%1, %%mm6 \n\t"
+                "movq      %%mm0, %%mm1 \n\t"
+                "movq      %%mm2, %%mm3 \n\t"
+                "movq      %%mm4, %%mm5 \n\t"
+                "movq      %%mm6, %%mm7 \n\t"
+                "punpcklbw %%mm0, %%mm0 \n\t"
+                "punpckhbw %%mm1, %%mm1 \n\t"
+                "punpcklbw %%mm2, %%mm2 \n\t"
+                "punpckhbw %%mm3, %%mm3 \n\t"
+                "punpcklbw %%mm4, %%mm4 \n\t"
+                "punpckhbw %%mm5, %%mm5 \n\t"
+                "punpcklbw %%mm6, %%mm6 \n\t"
+                "punpckhbw %%mm7, %%mm7 \n\t"
+                MOVNTQ"    %%mm0,   %0  \n\t"
+                MOVNTQ"    %%mm1,  8%0  \n\t"
+                MOVNTQ"    %%mm2, 16%0  \n\t"
+                MOVNTQ"    %%mm3, 24%0  \n\t"
+                MOVNTQ"    %%mm4, 32%0  \n\t"
+                MOVNTQ"    %%mm5, 40%0  \n\t"
+                MOVNTQ"    %%mm6, 48%0  \n\t"
+                MOVNTQ"    %%mm7, 56%0"
+                :"=m"(d[2*x])
+                :"m"(s2[x])
+                :"memory");
+        }
+        for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                                        uint8_t *dst,
+                                        long width, long height,
+                                        long srcStride1, long srcStride2,
+                                        long srcStride3, long dstStride)
+{
+    x86_reg x;
+    long y,w,h;
+    w=width/2; h=height;
+    for (y=0;y<h;y++) {
+        const uint8_t* yp=src1+srcStride1*y;
+        const uint8_t* up=src2+srcStride2*(y>>2);
+        const uint8_t* vp=src3+srcStride3*(y>>2);
+        uint8_t* d=dst+dstStride*y;
+        x=0;
+        for (;x<w-7;x+=8) {
+            __asm__ volatile(
+                PREFETCH"   32(%1, %0)          \n\t"
+                PREFETCH"   32(%2, %0)          \n\t"
+                PREFETCH"   32(%3, %0)          \n\t"
+                "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
+                "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
+                "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
+                "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
+                "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
+                "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
+                "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
+                "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
+                "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
+                "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
+
+                "movq            %%mm1, %%mm6   \n\t"
+                "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
+                "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
+                "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
+                MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
+                MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
+
+                "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
+                "movq     8(%1, %0, 4), %%mm0   \n\t"
+                "movq            %%mm0, %%mm3   \n\t"
+                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
+                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
+                MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
+                MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
+
+                "movq            %%mm4, %%mm6   \n\t"
+                "movq    16(%1, %0, 4), %%mm0   \n\t"
+                "movq            %%mm0, %%mm3   \n\t"
+                "punpcklbw       %%mm5, %%mm4   \n\t"
+                "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
+                "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
+                MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
+                MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
+
+                "punpckhbw       %%mm5, %%mm6   \n\t"
+                "movq    24(%1, %0, 4), %%mm0   \n\t"
+                "movq            %%mm0, %%mm3   \n\t"
+                "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
+                "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
+                MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
+                MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
+
+                : "+r" (x)
+                : "r"(yp), "r" (up), "r"(vp), "r"(d)
+                :"memory");
+        }
+        for (; x<w; x++) {
+            const long x2 = x<<2;
+            d[8*x+0] = yp[x2];
+            d[8*x+1] = up[x];
+            d[8*x+2] = yp[x2+1];
+            d[8*x+3] = vp[x];
+            d[8*x+4] = yp[x2+2];
+            d[8*x+5] = up[x];
+            d[8*x+6] = yp[x2+3];
+            d[8*x+7] = vp[x];
+        }
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
+{
+    dst +=   count;
+    src += 2*count;
+    count= - count;
+
+    if(count <= -16) {
+        count += 15;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -30(%1, %0, 2), %%mm0        \n\t"
+            "movq -22(%1, %0, 2), %%mm1        \n\t"
+            "movq -14(%1, %0, 2), %%mm2        \n\t"
+            "movq  -6(%1, %0, 2), %%mm3        \n\t"
+            "pand          %%mm7, %%mm0        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm2        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            MOVNTQ"        %%mm0,-15(%2, %0)   \n\t"
+            MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t"
+            "add             $16, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst)
+        );
+        count -= 15;
+    }
+    while(count<0) {
+        dst[count]= src[2*count];
+        count++;
+    }
+}
+
+static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0+=   count;
+    dst1+=   count;
+    src += 4*count;
+    count= - count;
+    if(count <= -8) {
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -28(%1, %0, 4), %%mm0        \n\t"
+            "movq -20(%1, %0, 4), %%mm1        \n\t"
+            "movq -12(%1, %0, 4), %%mm2        \n\t"
+            "movq  -4(%1, %0, 4), %%mm3        \n\t"
+            "pand          %%mm7, %%mm0        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm2        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            "movq          %%mm0, %%mm1        \n\t"
+            "movq          %%mm2, %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm2, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm1        \n\t"
+            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
+            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
+            "add              $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+    while(count<0) {
+        dst0[count]= src[4*count+0];
+        dst1[count]= src[4*count+2];
+        count++;
+    }
+}
+
+static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0 +=   count;
+    dst1 +=   count;
+    src0 += 4*count;
+    src1 += 4*count;
+    count= - count;
+#ifdef PAVGB
+    if(count <= -8) {
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw        %%mm7, %%mm7        \n\t"
+            "psrlw             $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq  -28(%1, %0, 4), %%mm0        \n\t"
+            "movq  -20(%1, %0, 4), %%mm1        \n\t"
+            "movq  -12(%1, %0, 4), %%mm2        \n\t"
+            "movq   -4(%1, %0, 4), %%mm3        \n\t"
+            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
+            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
+            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
+            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
+            "pand           %%mm7, %%mm0        \n\t"
+            "pand           %%mm7, %%mm1        \n\t"
+            "pand           %%mm7, %%mm2        \n\t"
+            "pand           %%mm7, %%mm3        \n\t"
+            "packuswb       %%mm1, %%mm0        \n\t"
+            "packuswb       %%mm3, %%mm2        \n\t"
+            "movq           %%mm0, %%mm1        \n\t"
+            "movq           %%mm2, %%mm3        \n\t"
+            "psrlw             $8, %%mm0        \n\t"
+            "psrlw             $8, %%mm2        \n\t"
+            "pand           %%mm7, %%mm1        \n\t"
+            "pand           %%mm7, %%mm3        \n\t"
+            "packuswb       %%mm2, %%mm0        \n\t"
+            "packuswb       %%mm3, %%mm1        \n\t"
+            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
+            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
+            "add               $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+#endif
+    while(count<0) {
+        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
+        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
+        count++;
+    }
+}
+
+static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0+=   count;
+    dst1+=   count;
+    src += 4*count;
+    count= - count;
+    if(count <= -8) {
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -28(%1, %0, 4), %%mm0        \n\t"
+            "movq -20(%1, %0, 4), %%mm1        \n\t"
+            "movq -12(%1, %0, 4), %%mm2        \n\t"
+            "movq  -4(%1, %0, 4), %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm1        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "psrlw            $8, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            "movq          %%mm0, %%mm1        \n\t"
+            "movq          %%mm2, %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm2, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm1        \n\t"
+            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
+            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
+            "add              $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+    src++;
+    while(count<0) {
+        dst0[count]= src[4*count+0];
+        dst1[count]= src[4*count+2];
+        count++;
+    }
+}
+
+static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0 +=   count;
+    dst1 +=   count;
+    src0 += 4*count;
+    src1 += 4*count;
+    count= - count;
+#ifdef PAVGB
+    if(count <= -8) {
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw        %%mm7, %%mm7        \n\t"
+            "psrlw             $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq  -28(%1, %0, 4), %%mm0        \n\t"
+            "movq  -20(%1, %0, 4), %%mm1        \n\t"
+            "movq  -12(%1, %0, 4), %%mm2        \n\t"
+            "movq   -4(%1, %0, 4), %%mm3        \n\t"
+            PAVGB" -28(%2, %0, 4), %%mm0        \n\t"
+            PAVGB" -20(%2, %0, 4), %%mm1        \n\t"
+            PAVGB" -12(%2, %0, 4), %%mm2        \n\t"
+            PAVGB" - 4(%2, %0, 4), %%mm3        \n\t"
+            "psrlw             $8, %%mm0        \n\t"
+            "psrlw             $8, %%mm1        \n\t"
+            "psrlw             $8, %%mm2        \n\t"
+            "psrlw             $8, %%mm3        \n\t"
+            "packuswb       %%mm1, %%mm0        \n\t"
+            "packuswb       %%mm3, %%mm2        \n\t"
+            "movq           %%mm0, %%mm1        \n\t"
+            "movq           %%mm2, %%mm3        \n\t"
+            "psrlw             $8, %%mm0        \n\t"
+            "psrlw             $8, %%mm2        \n\t"
+            "pand           %%mm7, %%mm1        \n\t"
+            "pand           %%mm7, %%mm3        \n\t"
+            "packuswb       %%mm2, %%mm0        \n\t"
+            "packuswb       %%mm3, %%mm1        \n\t"
+            MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t"
+            MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t"
+            "add               $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+#endif
+    src0++;
+    src1++;
+    while(count<0) {
+        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
+        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
+        count++;
+    }
+}
+
+static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                 long width, long height,
+                                 long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++) {
+        RENAME(extract_even)(src, ydst, width);
+        if(y&1) {
+            RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
+            udst+= chromStride;
+            vdst+= chromStride;
+        }
+
+        src += srcStride;
+        ydst+= lumStride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                 long width, long height,
+                                 long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++) {
+        RENAME(extract_even)(src, ydst, width);
+        RENAME(extract_odd2)(src, udst, vdst, chromWidth);
+
+        src += srcStride;
+        ydst+= lumStride;
+        udst+= chromStride;
+        vdst+= chromStride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                 long width, long height,
+                                 long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++) {
+        RENAME(extract_even)(src+1, ydst, width);
+        if(y&1) {
+            RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
+            udst+= chromStride;
+            vdst+= chromStride;
+        }
+
+        src += srcStride;
+        ydst+= lumStride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                 long width, long height,
+                                 long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++) {
+        RENAME(extract_even)(src+1, ydst, width);
+        RENAME(extract_even2)(src, udst, vdst, chromWidth);
+
+        src += srcStride;
+        ydst+= lumStride;
+        udst+= chromStride;
+        vdst+= chromStride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+        );
+}
+
+static inline void RENAME(rgb2rgb_init)(void)
+{
+    rgb15to16          = RENAME(rgb15to16);
+    rgb15tobgr24       = RENAME(rgb15tobgr24);
+    rgb15to32          = RENAME(rgb15to32);
+    rgb16tobgr24       = RENAME(rgb16tobgr24);
+    rgb16to32          = RENAME(rgb16to32);
+    rgb16to15          = RENAME(rgb16to15);
+    rgb24tobgr16       = RENAME(rgb24tobgr16);
+    rgb24tobgr15       = RENAME(rgb24tobgr15);
+    rgb24tobgr32       = RENAME(rgb24tobgr32);
+    rgb32to16          = RENAME(rgb32to16);
+    rgb32to15          = RENAME(rgb32to15);
+    rgb32tobgr24       = RENAME(rgb32tobgr24);
+    rgb24to15          = RENAME(rgb24to15);
+    rgb24to16          = RENAME(rgb24to16);
+    rgb24tobgr24       = RENAME(rgb24tobgr24);
+    shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
+    rgb32tobgr16       = RENAME(rgb32tobgr16);
+    rgb32tobgr15       = RENAME(rgb32tobgr15);
+    yv12toyuy2         = RENAME(yv12toyuy2);
+    yv12touyvy         = RENAME(yv12touyvy);
+    yuv422ptoyuy2      = RENAME(yuv422ptoyuy2);
+    yuv422ptouyvy      = RENAME(yuv422ptouyvy);
+    yuy2toyv12         = RENAME(yuy2toyv12);
+    planar2x           = RENAME(planar2x);
+    rgb24toyv12        = RENAME(rgb24toyv12);
+    interleaveBytes    = RENAME(interleaveBytes);
+    vu9_to_vu12        = RENAME(vu9_to_vu12);
+    yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
+
+    uyvytoyuv420       = RENAME(uyvytoyuv420);
+    uyvytoyuv422       = RENAME(uyvytoyuv422);
+    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
+    yuyvtoyuv422       = RENAME(yuyvtoyuv422);
+}
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
new file mode 100644
index 0000000000..19fcbbf305
--- /dev/null
+++ b/libswscale/x86/swscale_template.c
@@ -0,0 +1,2825 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_template.h"
+
+#undef REAL_MOVNTQ
+#undef MOVNTQ
+#undef PAVGB
+#undef PREFETCH
+
+#if COMPILE_TEMPLATE_AMD3DNOW
+#define PREFETCH  "prefetch"
+#elif COMPILE_TEMPLATE_MMX2
+#define PREFETCH "prefetchnta"
+#else
+#define PREFETCH  " # nop"
+#endif
+
+#if COMPILE_TEMPLATE_MMX2
+#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
+#elif COMPILE_TEMPLATE_AMD3DNOW
+#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
+#endif
+
+#if COMPILE_TEMPLATE_MMX2
+#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
+#else
+#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#endif
+#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
+
+#define YSCALEYUV2YV12X(x, offset, dest, width) \
+    __asm__ volatile(\
+        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
+        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
+        "movq                             %%mm3, %%mm4      \n\t"\
+        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        ".p2align                             4             \n\t" /* FIXME Unroll? */\
+        "1:                                                 \n\t"\
+        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
+        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
+        "add                                $16, %%"REG_d"  \n\t"\
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        "test                         %%"REG_S", %%"REG_S"  \n\t"\
+        "pmulhw                           %%mm0, %%mm2      \n\t"\
+        "pmulhw                           %%mm0, %%mm5      \n\t"\
+        "paddw                            %%mm2, %%mm3      \n\t"\
+        "paddw                            %%mm5, %%mm4      \n\t"\
+        " jnz                                1b             \n\t"\
+        "psraw                               $3, %%mm3      \n\t"\
+        "psraw                               $3, %%mm4      \n\t"\
+        "packuswb                         %%mm4, %%mm3      \n\t"\
+        MOVNTQ(%%mm3, (%1, %%REGa))\
+        "add                                 $8, %%"REG_a"  \n\t"\
+        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
+        "movq                             %%mm3, %%mm4      \n\t"\
+        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        "jb                                  1b             \n\t"\
+        :: "r" (&c->redDither),\
+        "r" (dest), "g" ((x86_reg)width)\
+        : "%"REG_a, "%"REG_d, "%"REG_S\
+    );
+
+#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
+    __asm__ volatile(\
+        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
+        "pxor                             %%mm4, %%mm4      \n\t"\
+        "pxor                             %%mm5, %%mm5      \n\t"\
+        "pxor                             %%mm6, %%mm6      \n\t"\
+        "pxor                             %%mm7, %%mm7      \n\t"\
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        ".p2align                             4             \n\t"\
+        "1:                                                 \n\t"\
+        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
+        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+        "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
+        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
+        "movq                             %%mm0, %%mm3      \n\t"\
+        "punpcklwd                        %%mm1, %%mm0      \n\t"\
+        "punpckhwd                        %%mm1, %%mm3      \n\t"\
+        "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
+        "pmaddwd                          %%mm1, %%mm0      \n\t"\
+        "pmaddwd                          %%mm1, %%mm3      \n\t"\
+        "paddd                            %%mm0, %%mm4      \n\t"\
+        "paddd                            %%mm3, %%mm5      \n\t"\
+        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
+        "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
+        "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
+        "test                         %%"REG_S", %%"REG_S"  \n\t"\
+        "movq                             %%mm2, %%mm0      \n\t"\
+        "punpcklwd                        %%mm3, %%mm2      \n\t"\
+        "punpckhwd                        %%mm3, %%mm0      \n\t"\
+        "pmaddwd                          %%mm1, %%mm2      \n\t"\
+        "pmaddwd                          %%mm1, %%mm0      \n\t"\
+        "paddd                            %%mm2, %%mm6      \n\t"\
+        "paddd                            %%mm0, %%mm7      \n\t"\
+        " jnz                                1b             \n\t"\
+        "psrad                              $16, %%mm4      \n\t"\
+        "psrad                              $16, %%mm5      \n\t"\
+        "psrad                              $16, %%mm6      \n\t"\
+        "psrad                              $16, %%mm7      \n\t"\
+        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
+        "packssdw                         %%mm5, %%mm4      \n\t"\
+        "packssdw                         %%mm7, %%mm6      \n\t"\
+        "paddw                            %%mm0, %%mm4      \n\t"\
+        "paddw                            %%mm0, %%mm6      \n\t"\
+        "psraw                               $3, %%mm4      \n\t"\
+        "psraw                               $3, %%mm6      \n\t"\
+        "packuswb                         %%mm6, %%mm4      \n\t"\
+        MOVNTQ(%%mm4, (%1, %%REGa))\
+        "add                                 $8, %%"REG_a"  \n\t"\
+        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+        "pxor                             %%mm4, %%mm4      \n\t"\
+        "pxor                             %%mm5, %%mm5      \n\t"\
+        "pxor                             %%mm6, %%mm6      \n\t"\
+        "pxor                             %%mm7, %%mm7      \n\t"\
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        "jb                                  1b             \n\t"\
+        :: "r" (&c->redDither),\
+        "r" (dest), "g" ((x86_reg)width)\
+        : "%"REG_a, "%"REG_d, "%"REG_S\
+    );
+
+#define YSCALEYUV2YV121 \
+    "mov %2, %%"REG_a"                    \n\t"\
+    ".p2align               4             \n\t" /* FIXME Unroll? */\
+    "1:                                   \n\t"\
+    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
+    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
+    "psraw                 $7, %%mm0      \n\t"\
+    "psraw                 $7, %%mm1      \n\t"\
+    "packuswb           %%mm1, %%mm0      \n\t"\
+    MOVNTQ(%%mm0, (%1, %%REGa))\
+    "add                   $8, %%"REG_a"  \n\t"\
+    "jnc                   1b             \n\t"
+
+#define YSCALEYUV2YV121_ACCURATE \
+    "mov %2, %%"REG_a"                    \n\t"\
+    "pcmpeqw %%mm7, %%mm7                 \n\t"\
+    "psrlw                 $15, %%mm7     \n\t"\
+    "psllw                  $6, %%mm7     \n\t"\
+    ".p2align                4            \n\t" /* FIXME Unroll? */\
+    "1:                                   \n\t"\
+    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
+    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
+    "paddsw             %%mm7, %%mm0      \n\t"\
+    "paddsw             %%mm7, %%mm1      \n\t"\
+    "psraw                 $7, %%mm0      \n\t"\
+    "psraw                 $7, %%mm1      \n\t"\
+    "packuswb           %%mm1, %%mm0      \n\t"\
+    MOVNTQ(%%mm0, (%1, %%REGa))\
+    "add                   $8, %%"REG_a"  \n\t"\
+    "jnc                   1b             \n\t"
+
+/*
+    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
+       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
+       "r" (dest), "m" (dstW_reg),
+       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
+    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
+*/
+#define YSCALEYUV2PACKEDX_UV \
+    __asm__ volatile(\
+        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
+        ".p2align                      4                \n\t"\
+        "nop                                            \n\t"\
+        "1:                                             \n\t"\
+        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
+        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
+        "movq                      %%mm3, %%mm4         \n\t"\
+        ".p2align                      4                \n\t"\
+        "2:                                             \n\t"\
+        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
+        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
+        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
+        "add                         $16, %%"REG_d"     \n\t"\
+        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "pmulhw                    %%mm0, %%mm2         \n\t"\
+        "pmulhw                    %%mm0, %%mm5         \n\t"\
+        "paddw                     %%mm2, %%mm3         \n\t"\
+        "paddw                     %%mm5, %%mm4         \n\t"\
+        "test                  %%"REG_S", %%"REG_S"     \n\t"\
+        " jnz                         2b                \n\t"\
+
+#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
+    "lea                "offset"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
+    "movq                    "#dst1", "#dst2"       \n\t"\
+    ".p2align                      4                \n\t"\
+    "2:                                             \n\t"\
+    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
+    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
+    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
+    "add                         $16, %%"REG_d"            \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pmulhw                 "#coeff", "#src1"       \n\t"\
+    "pmulhw                 "#coeff", "#src2"       \n\t"\
+    "paddw                   "#src1", "#dst1"       \n\t"\
+    "paddw                   "#src2", "#dst2"       \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    " jnz                         2b                \n\t"\
+
+#define YSCALEYUV2PACKEDX \
+    YSCALEYUV2PACKEDX_UV \
+    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
+
+#define YSCALEYUV2PACKEDX_END                     \
+        :: "r" (&c->redDither),                   \
+            "m" (dummy), "m" (dummy), "m" (dummy),\
+            "r" (dest), "m" (dstW_reg)            \
+        : "%"REG_a, "%"REG_d, "%"REG_S            \
+    );
+
+#define YSCALEYUV2PACKEDX_ACCURATE_UV \
+    __asm__ volatile(\
+        "xor %%"REG_a", %%"REG_a"                       \n\t"\
+        ".p2align                      4                \n\t"\
+        "nop                                            \n\t"\
+        "1:                                             \n\t"\
+        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
+        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "pxor                      %%mm4, %%mm4         \n\t"\
+        "pxor                      %%mm5, %%mm5         \n\t"\
+        "pxor                      %%mm6, %%mm6         \n\t"\
+        "pxor                      %%mm7, %%mm7         \n\t"\
+        ".p2align                      4                \n\t"\
+        "2:                                             \n\t"\
+        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
+        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
+        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
+        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
+        "movq                      %%mm0, %%mm3         \n\t"\
+        "punpcklwd                 %%mm1, %%mm0         \n\t"\
+        "punpckhwd                 %%mm1, %%mm3         \n\t"\
+        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
+        "pmaddwd                   %%mm1, %%mm0         \n\t"\
+        "pmaddwd                   %%mm1, %%mm3         \n\t"\
+        "paddd                     %%mm0, %%mm4         \n\t"\
+        "paddd                     %%mm3, %%mm5         \n\t"\
+        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
+        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
+        "test                  %%"REG_S", %%"REG_S"     \n\t"\
+        "movq                      %%mm2, %%mm0         \n\t"\
+        "punpcklwd                 %%mm3, %%mm2         \n\t"\
+        "punpckhwd                 %%mm3, %%mm0         \n\t"\
+        "pmaddwd                   %%mm1, %%mm2         \n\t"\
+        "pmaddwd                   %%mm1, %%mm0         \n\t"\
+        "paddd                     %%mm2, %%mm6         \n\t"\
+        "paddd                     %%mm0, %%mm7         \n\t"\
+        " jnz                         2b                \n\t"\
+        "psrad                       $16, %%mm4         \n\t"\
+        "psrad                       $16, %%mm5         \n\t"\
+        "psrad                       $16, %%mm6         \n\t"\
+        "psrad                       $16, %%mm7         \n\t"\
+        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
+        "packssdw                  %%mm5, %%mm4         \n\t"\
+        "packssdw                  %%mm7, %%mm6         \n\t"\
+        "paddw                     %%mm0, %%mm4         \n\t"\
+        "paddw                     %%mm0, %%mm6         \n\t"\
+        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
+        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
+
+#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
+    "lea                "offset"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pxor                      %%mm1, %%mm1         \n\t"\
+    "pxor                      %%mm5, %%mm5         \n\t"\
+    "pxor                      %%mm7, %%mm7         \n\t"\
+    "pxor                      %%mm6, %%mm6         \n\t"\
+    ".p2align                      4                \n\t"\
+    "2:                                             \n\t"\
+    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
+    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
+    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
+    "movq                      %%mm0, %%mm3         \n\t"\
+    "punpcklwd                 %%mm4, %%mm0         \n\t"\
+    "punpckhwd                 %%mm4, %%mm3         \n\t"\
+    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
+    "pmaddwd                   %%mm4, %%mm0         \n\t"\
+    "pmaddwd                   %%mm4, %%mm3         \n\t"\
+    "paddd                     %%mm0, %%mm1         \n\t"\
+    "paddd                     %%mm3, %%mm5         \n\t"\
+    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
+    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    "movq                      %%mm2, %%mm0         \n\t"\
+    "punpcklwd                 %%mm3, %%mm2         \n\t"\
+    "punpckhwd                 %%mm3, %%mm0         \n\t"\
+    "pmaddwd                   %%mm4, %%mm2         \n\t"\
+    "pmaddwd                   %%mm4, %%mm0         \n\t"\
+    "paddd                     %%mm2, %%mm7         \n\t"\
+    "paddd                     %%mm0, %%mm6         \n\t"\
+    " jnz                         2b                \n\t"\
+    "psrad                       $16, %%mm1         \n\t"\
+    "psrad                       $16, %%mm5         \n\t"\
+    "psrad                       $16, %%mm7         \n\t"\
+    "psrad                       $16, %%mm6         \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
+    "packssdw                  %%mm5, %%mm1         \n\t"\
+    "packssdw                  %%mm6, %%mm7         \n\t"\
+    "paddw                     %%mm0, %%mm1         \n\t"\
+    "paddw                     %%mm0, %%mm7         \n\t"\
+    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
+    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
+
+#define YSCALEYUV2PACKEDX_ACCURATE \
+    YSCALEYUV2PACKEDX_ACCURATE_UV \
+    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
+
+#define YSCALEYUV2RGBX \
+    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
+    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
+    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
+    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
+    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
+    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
+    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw           %%mm3, %%mm4       \n\t"\
+    "movq            %%mm2, %%mm0       \n\t"\
+    "movq            %%mm5, %%mm6       \n\t"\
+    "movq            %%mm4, %%mm3       \n\t"\
+    "punpcklwd       %%mm2, %%mm2       \n\t"\
+    "punpcklwd       %%mm5, %%mm5       \n\t"\
+    "punpcklwd       %%mm4, %%mm4       \n\t"\
+    "paddw           %%mm1, %%mm2       \n\t"\
+    "paddw           %%mm1, %%mm5       \n\t"\
+    "paddw           %%mm1, %%mm4       \n\t"\
+    "punpckhwd       %%mm0, %%mm0       \n\t"\
+    "punpckhwd       %%mm6, %%mm6       \n\t"\
+    "punpckhwd       %%mm3, %%mm3       \n\t"\
+    "paddw           %%mm7, %%mm0       \n\t"\
+    "paddw           %%mm7, %%mm6       \n\t"\
+    "paddw           %%mm7, %%mm3       \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb        %%mm0, %%mm2       \n\t"\
+    "packuswb        %%mm6, %%mm5       \n\t"\
+    "packuswb        %%mm3, %%mm4       \n\t"\
+
+#define REAL_YSCALEYUV2PACKED(index, c) \
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
+    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
+    "psraw                $3, %%mm0                           \n\t"\
+    "psraw                $3, %%mm1                           \n\t"\
+    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "xor            "#index", "#index"                        \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+
+#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
+    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define REAL_YSCALEYUV2RGB_COEFF(c) \
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
+
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
+    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
+    REAL_YSCALEYUV2RGB_COEFF(c)
+
+#define REAL_YSCALEYUV2PACKED1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $7, %%mm3     \n\t" \
+    "psraw                $7, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t" \
+
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+
+#define REAL_YSCALEYUV2RGB1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
+    "xor "#index", "#index"             \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $8, %%mm3     \n\t" \
+    "psrlw                $8, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t"
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+
+// do vertical chrominance interpolation
+#define REAL_YSCALEYUV2RGB1b(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
+    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+
+#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
+    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
+    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
+    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
+    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
+    "packuswb          %%mm1, %%mm7     \n\t"
+#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
+
+#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
+    "movq       "#b", "#q2"     \n\t" /* B */\
+    "movq       "#r", "#t"      \n\t" /* R */\
+    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
+    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
+    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
+    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
+    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
+    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
+    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
+\
+    MOVNTQ(   q0,   (dst, index, 4))\
+    MOVNTQ(    b,  8(dst, index, 4))\
+    MOVNTQ(   q2, 16(dst, index, 4))\
+    MOVNTQ(   q3, 24(dst, index, 4))\
+\
+    "add      $8, "#index"      \n\t"\
+    "cmp "#dstw", "#index"      \n\t"\
+    " jb      1b                \n\t"
+#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
+
+#define REAL_WRITERGB16(dst, dstw, index) \
+    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
+    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
+    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
+    "psrlq           $3, %%mm2  \n\t"\
+\
+    "movq         %%mm2, %%mm1  \n\t"\
+    "movq         %%mm4, %%mm3  \n\t"\
+\
+    "punpcklbw    %%mm7, %%mm3  \n\t"\
+    "punpcklbw    %%mm5, %%mm2  \n\t"\
+    "punpckhbw    %%mm7, %%mm4  \n\t"\
+    "punpckhbw    %%mm5, %%mm1  \n\t"\
+\
+    "psllq           $3, %%mm3  \n\t"\
+    "psllq           $3, %%mm4  \n\t"\
+\
+    "por          %%mm3, %%mm2  \n\t"\
+    "por          %%mm4, %%mm1  \n\t"\
+\
+    MOVNTQ(%%mm2,  (dst, index, 2))\
+    MOVNTQ(%%mm1, 8(dst, index, 2))\
+\
+    "add             $8, "#index"   \n\t"\
+    "cmp        "#dstw", "#index"   \n\t"\
+    " jb             1b             \n\t"
+#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
+
+#define REAL_WRITERGB15(dst, dstw, index) \
+    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
+    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
+    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
+    "psrlq           $3, %%mm2  \n\t"\
+    "psrlq           $1, %%mm5  \n\t"\
+\
+    "movq         %%mm2, %%mm1  \n\t"\
+    "movq         %%mm4, %%mm3  \n\t"\
+\
+    "punpcklbw    %%mm7, %%mm3  \n\t"\
+    "punpcklbw    %%mm5, %%mm2  \n\t"\
+    "punpckhbw    %%mm7, %%mm4  \n\t"\
+    "punpckhbw    %%mm5, %%mm1  \n\t"\
+\
+    "psllq           $2, %%mm3  \n\t"\
+    "psllq           $2, %%mm4  \n\t"\
+\
+    "por          %%mm3, %%mm2  \n\t"\
+    "por          %%mm4, %%mm1  \n\t"\
+\
+    MOVNTQ(%%mm2,  (dst, index, 2))\
+    MOVNTQ(%%mm1, 8(dst, index, 2))\
+\
+    "add             $8, "#index"   \n\t"\
+    "cmp        "#dstw", "#index"   \n\t"\
+    " jb             1b             \n\t"
+#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
+
+#define WRITEBGR24OLD(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq      %%mm2, %%mm1             \n\t" /* B */\
+    "movq      %%mm5, %%mm6             \n\t" /* R */\
+    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
+    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
+    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
+    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
+    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
+    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
+    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
+\
+    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
+    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
+    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
+    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
+    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
+    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
+    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
+    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
+\
+    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
+    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
+    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
+    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
+    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
+    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
+    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
+    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
+    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
+    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
+    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
+    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
+    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
+\
+    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
+    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
+    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
+    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
+    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
+    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
+    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
+    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
+\
+    MOVNTQ(%%mm0,   (dst))\
+    MOVNTQ(%%mm2,  8(dst))\
+    MOVNTQ(%%mm3, 16(dst))\
+    "add         $24, "#dst"            \n\t"\
+\
+    "add          $8, "#index"          \n\t"\
+    "cmp     "#dstw", "#index"          \n\t"\
+    " jb          1b                    \n\t"
+
+#define WRITEBGR24MMX(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq      %%mm2, %%mm1     \n\t" /* B */\
+    "movq      %%mm5, %%mm6     \n\t" /* R */\
+    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
+    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
+    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
+    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
+    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
+    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
+    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
+\
+    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
+    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
+    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
+    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
+\
+    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
+    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
+    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
+    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
+\
+    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
+    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
+    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
+    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
+\
+    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
+    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
+    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
+    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
+    MOVNTQ(%%mm0, (dst))\
+\
+    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
+    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
+    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
+    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
+    MOVNTQ(%%mm6, 8(dst))\
+\
+    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
+    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
+    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
+    MOVNTQ(%%mm5, 16(dst))\
+\
+    "add         $24, "#dst"    \n\t"\
+\
+    "add          $8, "#index"  \n\t"\
+    "cmp     "#dstw", "#index"  \n\t"\
+    " jb          1b            \n\t"
+
+#define WRITEBGR24MMX2(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
+    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
+    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
+    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
+    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
+\
+    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
+    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
+    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
+\
+    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
+    "por    %%mm1, %%mm6        \n\t"\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, (dst))\
+\
+    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
+    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
+    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
+    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
+\
+    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
+    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
+    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
+\
+    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, 8(dst))\
+\
+    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
+    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
+    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
+\
+    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
+    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
+    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
+\
+    "por    %%mm1, %%mm3        \n\t"\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, 16(dst))\
+\
+    "add      $24, "#dst"       \n\t"\
+\
+    "add       $8, "#index"     \n\t"\
+    "cmp  "#dstw", "#index"     \n\t"\
+    " jb       1b               \n\t"
+
+#if COMPILE_TEMPLATE_MMX2
+#undef WRITEBGR24
+#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
+#else
+#undef WRITEBGR24
+#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
+#endif
+
+#define REAL_WRITEYUY2(dst, dstw, index) \
+    "packuswb  %%mm3, %%mm3     \n\t"\
+    "packuswb  %%mm4, %%mm4     \n\t"\
+    "packuswb  %%mm7, %%mm1     \n\t"\
+    "punpcklbw %%mm4, %%mm3     \n\t"\
+    "movq      %%mm1, %%mm7     \n\t"\
+    "punpcklbw %%mm3, %%mm1     \n\t"\
+    "punpckhbw %%mm3, %%mm7     \n\t"\
+\
+    MOVNTQ(%%mm1, (dst, index, 2))\
+    MOVNTQ(%%mm7, 8(dst, index, 2))\
+\
+    "add          $8, "#index"  \n\t"\
+    "cmp     "#dstw", "#index"  \n\t"\
+    " jb          1b            \n\t"
+#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
+
+
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+    if(!(c->flags & SWS_BITEXACT)) {
+        if (c->flags & SWS_ACCURATE_RND) {
+            if (uDest) {
+                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+            }
+            if (CONFIG_SWSCALE_ALPHA && aDest) {
+                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+            }
+
+            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+        } else {
+            if (uDest) {
+                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+            }
+            if (CONFIG_SWSCALE_ALPHA && aDest) {
+                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+            }
+
+            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+        }
+        return;
+    }
+    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
+                chrFilter, chrSrc, chrFilterSize,
+                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
+}
+
+static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
+{
+    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
+                 chrFilter, chrSrc, chrFilterSize,
+                 dest, uDest, dstW, chrDstW, dstFormat);
+}
+
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+    int i;
+    if(!(c->flags & SWS_BITEXACT)) {
+        long p= 4;
+        const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
+        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
+
+        if (c->flags & SWS_ACCURATE_RND) {
+            while(p--) {
+                if (dst[p]) {
+                    __asm__ volatile(
+                        YSCALEYUV2YV121_ACCURATE
+                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                        "g" (-counter[p])
+                        : "%"REG_a
+                    );
+                }
+            }
+        } else {
+            while(p--) {
+                if (dst[p]) {
+                    __asm__ volatile(
+                        YSCALEYUV2YV121
+                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                        "g" (-counter[p])
+                        : "%"REG_a
+                    );
+                }
+            }
+        }
+        return;
+    }
+    for (i=0; i<dstW; i++) {
+        int val= (lumSrc[i]+64)>>7;
+
+        if (val&256) {
+            if (val<0) val=0;
+            else       val=255;
+        }
+
+        dest[i]= val;
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++) {
+            int u=(chrSrc[i       ]+64)>>7;
+            int v=(chrSrc[i + VOFW]+64)>>7;
+
+            if ((u|v)&256) {
+                if (u<0)        u=0;
+                else if (u>255) u=255;
+                if (v<0)        v=0;
+                else if (v>255) v=255;
+            }
+
+            uDest[i]= u;
+            vDest[i]= v;
+        }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest)
+        for (i=0; i<dstW; i++) {
+            int val= (alpSrc[i]+64)>>7;
+            aDest[i]= av_clip_uint8(val);
+        }
+}
+
+
+/**
+ * vertical scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+    if(!(c->flags & SWS_BITEXACT)) {
+        if (c->flags & SWS_ACCURATE_RND) {
+            switch(c->dstFormat) {
+            case PIX_FMT_RGB32:
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    YSCALEYUV2PACKEDX_ACCURATE
+                    YSCALEYUV2RGBX
+                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
+                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
+                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
+                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
+                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
+                    "psraw                        $3, %%mm1         \n\t"
+                    "psraw                        $3, %%mm7         \n\t"
+                    "packuswb                  %%mm7, %%mm1         \n\t"
+                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+
+                    YSCALEYUV2PACKEDX_END
+                } else {
+                    YSCALEYUV2PACKEDX_ACCURATE
+                    YSCALEYUV2RGBX
+                    "pcmpeqd %%mm7, %%mm7 \n\t"
+                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+
+                    YSCALEYUV2PACKEDX_END
+                }
+                return;
+            case PIX_FMT_BGR24:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
+                "add %4, %%"REG_c"                        \n\t"
+                WRITEBGR24(%%REGc, %5, %%REGa)
+
+
+                :: "r" (&c->redDither),
+                "m" (dummy), "m" (dummy), "m" (dummy),
+                "r" (dest), "m" (dstW_reg)
+                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+                );
+                return;
+            case PIX_FMT_RGB555:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+
+                WRITERGB15(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_RGB565:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+
+                WRITERGB16(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_YUYV422:
+                YSCALEYUV2PACKEDX_ACCURATE
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+                "psraw $3, %%mm3    \n\t"
+                "psraw $3, %%mm4    \n\t"
+                "psraw $3, %%mm1    \n\t"
+                "psraw $3, %%mm7    \n\t"
+                WRITEYUY2(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            }
+        } else {
+            switch(c->dstFormat) {
+            case PIX_FMT_RGB32:
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    YSCALEYUV2PACKEDX
+                    YSCALEYUV2RGBX
+                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
+                    "psraw                        $3, %%mm1         \n\t"
+                    "psraw                        $3, %%mm7         \n\t"
+                    "packuswb                  %%mm7, %%mm1         \n\t"
+                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+                    YSCALEYUV2PACKEDX_END
+                } else {
+                    YSCALEYUV2PACKEDX
+                    YSCALEYUV2RGBX
+                    "pcmpeqd %%mm7, %%mm7 \n\t"
+                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                    YSCALEYUV2PACKEDX_END
+                }
+                return;
+            case PIX_FMT_BGR24:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor                    %%mm7, %%mm7       \n\t"
+                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
+                "add                        %4, %%"REG_c"   \n\t"
+                WRITEBGR24(%%REGc, %5, %%REGa)
+
+                :: "r" (&c->redDither),
+                "m" (dummy), "m" (dummy), "m" (dummy),
+                "r" (dest),  "m" (dstW_reg)
+                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+                );
+                return;
+            case PIX_FMT_RGB555:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+
+                WRITERGB15(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_RGB565:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+
+                WRITERGB16(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_YUYV422:
+                YSCALEYUV2PACKEDX
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+                "psraw $3, %%mm3    \n\t"
+                "psraw $3, %%mm4    \n\t"
+                "psraw $3, %%mm1    \n\t"
+                "psraw $3, %%mm7    \n\t"
+                WRITEYUY2(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            }
+        }
+    }
+    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                   chrFilter, chrSrc, chrFilterSize,
+                   alpSrc, dest, dstW, dstY);
+}
+
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+    if(!(c->flags & SWS_BITEXACT)) {
+        switch(c->dstFormat) {
+        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+        case PIX_FMT_RGB32:
+            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+#if ARCH_X86_64
+                __asm__ volatile(
+                    YSCALEYUV2RGB(%%r8, %5)
+                    YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
+                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+                    "packuswb            %%mm7, %%mm1       \n\t"
+                    WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
+                    "a" (&c->redDither)
+                    ,"r" (abuf0), "r" (abuf1)
+                    : "%r8"
+                );
+#else
+                c->u_temp=(intptr_t)abuf0;
+                c->v_temp=(intptr_t)abuf1;
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB(%%REGBP, %5)
+                    "push                   %0              \n\t"
+                    "push                   %1              \n\t"
+                    "mov          "U_TEMP"(%5), %0          \n\t"
+                    "mov          "V_TEMP"(%5), %1          \n\t"
+                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
+                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+                    "packuswb            %%mm7, %%mm1       \n\t"
+                    "pop                    %1              \n\t"
+                    "pop                    %0              \n\t"
+                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+#endif
+            } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB(%%REGBP, %5)
+                    "pcmpeqd %%mm7, %%mm7                   \n\t"
+                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+            }
+            return;
+        case PIX_FMT_BGR24:
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+            );
+            return;
+        case PIX_FMT_RGB555:
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+            );
+            return;
+        case PIX_FMT_RGB565:
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+            );
+            return;
+        case PIX_FMT_YUYV422:
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov %4, %%"REG_b"                        \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2PACKED(%%REGBP, %5)
+                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+            );
+            return;
+        default: break;
+        }
+    }
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (flags&SWS_FULL_CHR_H_INT) {
+        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
+        return;
+    }
+
+    if(!(flags & SWS_BITEXACT)) {
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+            switch(dstFormat) {
+            case PIX_FMT_RGB32:
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1(%%REGBP, %5)
+                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                } else {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1(%%REGBP, %5)
+                        "pcmpeqd %%mm7, %%mm7                   \n\t"
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                }
+                return;
+            case PIX_FMT_BGR24:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB555:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB565:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_YUYV422:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2PACKED1(%%REGBP, %5)
+                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            }
+        } else {
+            switch(dstFormat) {
+            case PIX_FMT_RGB32:
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                } else {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        "pcmpeqd %%mm7, %%mm7                   \n\t"
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                }
+                return;
+            case PIX_FMT_BGR24:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB555:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB565:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_YUYV422:
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2PACKED1b(%%REGBP, %5)
+                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+                return;
+            }
+        }
+    }
+    if (uvalpha < 2048) {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    } else {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    }
+}
+
+//FIXME yuy2* can read up to 7 samples too much
+
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "movq "MANGLE(bm01010101)", %%mm2           \n\t"
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
+        "pand                %%mm2, %%mm0           \n\t"
+        "pand                %%mm2, %%mm1           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "movq                %%mm0, (%2, %%"REG_a") \n\t"
+        "add                    $8, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
+        : "%"REG_a
+    );
+}
+
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
+        "psrlw                  $8, %%mm0           \n\t"
+        "psrlw                  $8, %%mm1           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "movq                %%mm0, %%mm1           \n\t"
+        "psrlw                  $8, %%mm0           \n\t"
+        "pand                %%mm4, %%mm1           \n\t"
+        "packuswb            %%mm0, %%mm0           \n\t"
+        "packuswb            %%mm1, %%mm1           \n\t"
+        "movd                %%mm0, (%3, %%"REG_a") \n\t"
+        "movd                %%mm1, (%2, %%"REG_a") \n\t"
+        "add                    $4, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+        : "%"REG_a
+    );
+    assert(src1 == src2);
+}
+
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
+        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
+        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
+        "psrlw                  $8, %%mm0           \n\t"
+        "psrlw                  $8, %%mm1           \n\t"
+        "psrlw                  $8, %%mm2           \n\t"
+        "psrlw                  $8, %%mm3           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "packuswb            %%mm3, %%mm2           \n\t"
+        "movq                %%mm0, (%3, %%"REG_a") \n\t"
+        "movq                %%mm2, (%4, %%"REG_a") \n\t"
+        "add                    $8, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
+        : "%"REG_a
+    );
+}
+
+/* This is almost identical to the previous, end exists only because
+ * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "mov                  %0, %%"REG_a"         \n\t"
+        "1:                                         \n\t"
+        "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
+        "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
+        "psrlw                $8, %%mm0             \n\t"
+        "psrlw                $8, %%mm1             \n\t"
+        "packuswb          %%mm1, %%mm0             \n\t"
+        "movq              %%mm0, (%2, %%"REG_a")   \n\t"
+        "add                  $8, %%"REG_a"         \n\t"
+        " js                  1b                    \n\t"
+        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
+        : "%"REG_a
+    );
+}
+
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
+        "pand                %%mm4, %%mm0           \n\t"
+        "pand                %%mm4, %%mm1           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "movq                %%mm0, %%mm1           \n\t"
+        "psrlw                  $8, %%mm0           \n\t"
+        "pand                %%mm4, %%mm1           \n\t"
+        "packuswb            %%mm0, %%mm0           \n\t"
+        "packuswb            %%mm1, %%mm1           \n\t"
+        "movd                %%mm0, (%3, %%"REG_a") \n\t"
+        "movd                %%mm1, (%2, %%"REG_a") \n\t"
+        "add                    $4, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+        : "%"REG_a
+    );
+    assert(src1 == src2);
+}
+
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    __asm__ volatile(
+        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
+        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
+        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
+        "pand                %%mm4, %%mm0           \n\t"
+        "pand                %%mm4, %%mm1           \n\t"
+        "pand                %%mm4, %%mm2           \n\t"
+        "pand                %%mm4, %%mm3           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "packuswb            %%mm3, %%mm2           \n\t"
+        "movq                %%mm0, (%3, %%"REG_a") \n\t"
+        "movq                %%mm2, (%4, %%"REG_a") \n\t"
+        "add                    $8, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
+        : "%"REG_a
+    );
+}
+
+static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
+                                    const uint8_t *src, long width)
+{
+    __asm__ volatile(
+        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+        "mov                    %0, %%"REG_a"       \n\t"
+        "1:                                         \n\t"
+        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
+        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
+        "movq                %%mm0, %%mm2           \n\t"
+        "movq                %%mm1, %%mm3           \n\t"
+        "pand                %%mm4, %%mm0           \n\t"
+        "pand                %%mm4, %%mm1           \n\t"
+        "psrlw                  $8, %%mm2           \n\t"
+        "psrlw                  $8, %%mm3           \n\t"
+        "packuswb            %%mm1, %%mm0           \n\t"
+        "packuswb            %%mm3, %%mm2           \n\t"
+        "movq                %%mm0, (%2, %%"REG_a") \n\t"
+        "movq                %%mm2, (%3, %%"REG_a") \n\t"
+        "add                    $8, %%"REG_a"       \n\t"
+        " js                    1b                  \n\t"
+        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
+        : "%"REG_a
+    );
+}
+
+static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+                                    const uint8_t *src1, const uint8_t *src2,
+                                    long width, uint32_t *unused)
+{
+    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
+}
+
+static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
+                                    const uint8_t *src1, const uint8_t *src2,
+                                    long width, uint32_t *unused)
+{
+    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
+}
+
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+{
+
+    if(srcFormat == PIX_FMT_BGR24) {
+        __asm__ volatile(
+            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
+            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
+            :
+        );
+    } else {
+        __asm__ volatile(
+            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
+            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
+            :
+        );
+    }
+
+    __asm__ volatile(
+        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
+        "mov                        %2, %%"REG_a"   \n\t"
+        "pxor                    %%mm7, %%mm7       \n\t"
+        "1:                                         \n\t"
+        PREFETCH"               64(%0)              \n\t"
+        "movd                     (%0), %%mm0       \n\t"
+        "movd                    2(%0), %%mm1       \n\t"
+        "movd                    6(%0), %%mm2       \n\t"
+        "movd                    8(%0), %%mm3       \n\t"
+        "add                       $12, %0          \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "pmaddwd                 %%mm5, %%mm0       \n\t"
+        "pmaddwd                 %%mm6, %%mm1       \n\t"
+        "pmaddwd                 %%mm5, %%mm2       \n\t"
+        "pmaddwd                 %%mm6, %%mm3       \n\t"
+        "paddd                   %%mm1, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+        "paddd                   %%mm4, %%mm0       \n\t"
+        "paddd                   %%mm4, %%mm2       \n\t"
+        "psrad                     $15, %%mm0       \n\t"
+        "psrad                     $15, %%mm2       \n\t"
+        "packssdw                %%mm2, %%mm0       \n\t"
+        "packuswb                %%mm0, %%mm0       \n\t"
+        "movd                %%mm0, (%1, %%"REG_a") \n\t"
+        "add                        $4, %%"REG_a"   \n\t"
+        " js                        1b              \n\t"
+    : "+r" (src)
+    : "r" (dst+width), "g" ((x86_reg)-width)
+    : "%"REG_a
+    );
+}
+
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+{
+    __asm__ volatile(
+        "movq                    24(%4), %%mm6       \n\t"
+        "mov                        %3, %%"REG_a"   \n\t"
+        "pxor                    %%mm7, %%mm7       \n\t"
+        "1:                                         \n\t"
+        PREFETCH"               64(%0)              \n\t"
+        "movd                     (%0), %%mm0       \n\t"
+        "movd                    2(%0), %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "movq                    %%mm0, %%mm2       \n\t"
+        "movq                    %%mm1, %%mm3       \n\t"
+        "pmaddwd                  (%4), %%mm0       \n\t"
+        "pmaddwd                 8(%4), %%mm1       \n\t"
+        "pmaddwd                16(%4), %%mm2       \n\t"
+        "pmaddwd                 %%mm6, %%mm3       \n\t"
+        "paddd                   %%mm1, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+
+        "movd                    6(%0), %%mm1       \n\t"
+        "movd                    8(%0), %%mm3       \n\t"
+        "add                       $12, %0          \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "movq                    %%mm1, %%mm4       \n\t"
+        "movq                    %%mm3, %%mm5       \n\t"
+        "pmaddwd                  (%4), %%mm1       \n\t"
+        "pmaddwd                 8(%4), %%mm3       \n\t"
+        "pmaddwd                16(%4), %%mm4       \n\t"
+        "pmaddwd                 %%mm6, %%mm5       \n\t"
+        "paddd                   %%mm3, %%mm1       \n\t"
+        "paddd                   %%mm5, %%mm4       \n\t"
+
+        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
+        "paddd                   %%mm3, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+        "paddd                   %%mm3, %%mm1       \n\t"
+        "paddd                   %%mm3, %%mm4       \n\t"
+        "psrad                     $15, %%mm0       \n\t"
+        "psrad                     $15, %%mm2       \n\t"
+        "psrad                     $15, %%mm1       \n\t"
+        "psrad                     $15, %%mm4       \n\t"
+        "packssdw                %%mm1, %%mm0       \n\t"
+        "packssdw                %%mm4, %%mm2       \n\t"
+        "packuswb                %%mm0, %%mm0       \n\t"
+        "packuswb                %%mm2, %%mm2       \n\t"
+        "movd                %%mm0, (%1, %%"REG_a") \n\t"
+        "movd                %%mm2, (%2, %%"REG_a") \n\t"
+        "add                        $4, %%"REG_a"   \n\t"
+        " js                        1b              \n\t"
+    : "+r" (src)
+    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
+    : "%"REG_a
+    );
+}
+
+static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
+}
+
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
+    assert(src1 == src2);
+}
+
+static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int r= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
+}
+
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    assert(src1==src2);
+    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
+}
+
+static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++) {
+        int r= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int b= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+}
+
+
+// bilinear / bicubic scaling
+static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
+                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+{
+    assert(filterSize % 4 == 0 && filterSize>0);
+    if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
+        x86_reg counter= -2*dstW;
+        filter-= counter*2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+#if defined(PIC)
+            "push            %%"REG_b"              \n\t"
+#endif
+            "pxor                %%mm7, %%mm7       \n\t"
+            "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
+            "mov             %%"REG_a", %%"REG_BP"  \n\t"
+            ".p2align                4              \n\t"
+            "1:                                     \n\t"
+            "movzwl   (%2, %%"REG_BP"), %%eax       \n\t"
+            "movzwl  2(%2, %%"REG_BP"), %%ebx       \n\t"
+            "movq  (%1, %%"REG_BP", 4), %%mm1       \n\t"
+            "movq 8(%1, %%"REG_BP", 4), %%mm3       \n\t"
+            "movd      (%3, %%"REG_a"), %%mm0       \n\t"
+            "movd      (%3, %%"REG_b"), %%mm2       \n\t"
+            "punpcklbw           %%mm7, %%mm0       \n\t"
+            "punpcklbw           %%mm7, %%mm2       \n\t"
+            "pmaddwd             %%mm1, %%mm0       \n\t"
+            "pmaddwd             %%mm2, %%mm3       \n\t"
+            "movq                %%mm0, %%mm4       \n\t"
+            "punpckldq           %%mm3, %%mm0       \n\t"
+            "punpckhdq           %%mm3, %%mm4       \n\t"
+            "paddd               %%mm4, %%mm0       \n\t"
+            "psrad                  $7, %%mm0       \n\t"
+            "packssdw            %%mm0, %%mm0       \n\t"
+            "movd                %%mm0, (%4, %%"REG_BP")    \n\t"
+            "add                    $4, %%"REG_BP"  \n\t"
+            " jnc                   1b              \n\t"
+
+            "pop            %%"REG_BP"              \n\t"
+#if defined(PIC)
+            "pop             %%"REG_b"              \n\t"
+#endif
+            : "+a" (counter)
+            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
+#if !defined(PIC)
+            : "%"REG_b
+#endif
+        );
+    } else if (filterSize==8) {
+        x86_reg counter= -2*dstW;
+        filter-= counter*4;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+#if defined(PIC)
+            "push             %%"REG_b"             \n\t"
+#endif
+            "pxor                 %%mm7, %%mm7      \n\t"
+            "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
+            "mov              %%"REG_a", %%"REG_BP" \n\t"
+            ".p2align                 4             \n\t"
+            "1:                                     \n\t"
+            "movzwl    (%2, %%"REG_BP"), %%eax      \n\t"
+            "movzwl   2(%2, %%"REG_BP"), %%ebx      \n\t"
+            "movq   (%1, %%"REG_BP", 8), %%mm1      \n\t"
+            "movq 16(%1, %%"REG_BP", 8), %%mm3      \n\t"
+            "movd       (%3, %%"REG_a"), %%mm0      \n\t"
+            "movd       (%3, %%"REG_b"), %%mm2      \n\t"
+            "punpcklbw            %%mm7, %%mm0      \n\t"
+            "punpcklbw            %%mm7, %%mm2      \n\t"
+            "pmaddwd              %%mm1, %%mm0      \n\t"
+            "pmaddwd              %%mm2, %%mm3      \n\t"
+
+            "movq  8(%1, %%"REG_BP", 8), %%mm1      \n\t"
+            "movq 24(%1, %%"REG_BP", 8), %%mm5      \n\t"
+            "movd      4(%3, %%"REG_a"), %%mm4      \n\t"
+            "movd      4(%3, %%"REG_b"), %%mm2      \n\t"
+            "punpcklbw            %%mm7, %%mm4      \n\t"
+            "punpcklbw            %%mm7, %%mm2      \n\t"
+            "pmaddwd              %%mm1, %%mm4      \n\t"
+            "pmaddwd              %%mm2, %%mm5      \n\t"
+            "paddd                %%mm4, %%mm0      \n\t"
+            "paddd                %%mm5, %%mm3      \n\t"
+            "movq                 %%mm0, %%mm4      \n\t"
+            "punpckldq            %%mm3, %%mm0      \n\t"
+            "punpckhdq            %%mm3, %%mm4      \n\t"
+            "paddd                %%mm4, %%mm0      \n\t"
+            "psrad                   $7, %%mm0      \n\t"
+            "packssdw             %%mm0, %%mm0      \n\t"
+            "movd                 %%mm0, (%4, %%"REG_BP")   \n\t"
+            "add                     $4, %%"REG_BP" \n\t"
+            " jnc                    1b             \n\t"
+
+            "pop             %%"REG_BP"             \n\t"
+#if defined(PIC)
+            "pop              %%"REG_b"             \n\t"
+#endif
+            : "+a" (counter)
+            : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
+#if !defined(PIC)
+            : "%"REG_b
+#endif
+        );
+    } else {
+        const uint8_t *offset = src+filterSize;
+        x86_reg counter= -2*dstW;
+        //filter-= counter*filterSize/2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+            "pxor                  %%mm7, %%mm7     \n\t"
+            ".p2align                  4            \n\t"
+            "1:                                     \n\t"
+            "mov                      %2, %%"REG_c" \n\t"
+            "movzwl      (%%"REG_c", %0), %%eax     \n\t"
+            "movzwl     2(%%"REG_c", %0), %%edx     \n\t"
+            "mov                      %5, %%"REG_c" \n\t"
+            "pxor                  %%mm4, %%mm4     \n\t"
+            "pxor                  %%mm5, %%mm5     \n\t"
+            "2:                                     \n\t"
+            "movq                   (%1), %%mm1     \n\t"
+            "movq               (%1, %6), %%mm3     \n\t"
+            "movd (%%"REG_c", %%"REG_a"), %%mm0     \n\t"
+            "movd (%%"REG_c", %%"REG_d"), %%mm2     \n\t"
+            "punpcklbw             %%mm7, %%mm0     \n\t"
+            "punpcklbw             %%mm7, %%mm2     \n\t"
+            "pmaddwd               %%mm1, %%mm0     \n\t"
+            "pmaddwd               %%mm2, %%mm3     \n\t"
+            "paddd                 %%mm3, %%mm5     \n\t"
+            "paddd                 %%mm0, %%mm4     \n\t"
+            "add                      $8, %1        \n\t"
+            "add                      $4, %%"REG_c" \n\t"
+            "cmp                      %4, %%"REG_c" \n\t"
+            " jb                      2b            \n\t"
+            "add                      %6, %1        \n\t"
+            "movq                  %%mm4, %%mm0     \n\t"
+            "punpckldq             %%mm5, %%mm4     \n\t"
+            "punpckhdq             %%mm5, %%mm0     \n\t"
+            "paddd                 %%mm0, %%mm4     \n\t"
+            "psrad                    $7, %%mm4     \n\t"
+            "packssdw              %%mm4, %%mm4     \n\t"
+            "mov                      %3, %%"REG_a" \n\t"
+            "movd                  %%mm4, (%%"REG_a", %0)   \n\t"
+            "add                      $4, %0        \n\t"
+            " jnc                     1b            \n\t"
+
+            : "+r" (counter), "+r" (filter)
+            : "m" (filterPos), "m" (dst), "m"(offset),
+            "m" (src), "r" ((x86_reg)filterSize*2)
+            : "%"REG_a, "%"REG_c, "%"REG_d
+        );
+    }
+}
+
+//FIXME all pal and rgb srcFormats could do this convertion as well
+//FIXME all scalers more complex than bilinear could do half of this transform
+static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
+        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
+    }
+}
+static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
+        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
+    }
+}
+static void RENAME(lumRangeToJpeg)(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
+}
+static void RENAME(lumRangeFromJpeg)(int16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (dst[i]*14071 + 33561947)>>14;
+}
+
+#define FAST_BILINEAR_X86 \
+    "subl    %%edi, %%esi    \n\t" /*  src[xx+1] - src[xx] */                   \
+    "imull   %%ecx, %%esi    \n\t" /* (src[xx+1] - src[xx])*xalpha */           \
+    "shll      $16, %%edi    \n\t"                                              \
+    "addl    %%edi, %%esi    \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */  \
+    "mov        %1, %%"REG_D"\n\t"                                              \
+    "shrl       $9, %%esi    \n\t"                                              \
+
+static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
+                                        long dstWidth, const uint8_t *src, int srcW,
+                                        int xInc)
+{
+#if COMPILE_TEMPLATE_MMX2
+    int32_t *filterPos = c->hLumFilterPos;
+    int16_t *filter    = c->hLumFilter;
+    int     canMMX2BeUsed  = c->canMMX2BeUsed;
+    void    *mmx2FilterCode= c->lumMmx2FilterCode;
+    int i;
+#if defined(PIC)
+    DECLARE_ALIGNED(8, uint64_t, ebxsave);
+#endif
+    if (canMMX2BeUsed) {
+        __asm__ volatile(
+#if defined(PIC)
+            "mov               %%"REG_b", %5        \n\t"
+#endif
+            "pxor                  %%mm7, %%mm7     \n\t"
+            "mov                      %0, %%"REG_c" \n\t"
+            "mov                      %1, %%"REG_D" \n\t"
+            "mov                      %2, %%"REG_d" \n\t"
+            "mov                      %3, %%"REG_b" \n\t"
+            "xor               %%"REG_a", %%"REG_a" \n\t" // i
+            PREFETCH"        (%%"REG_c")            \n\t"
+            PREFETCH"      32(%%"REG_c")            \n\t"
+            PREFETCH"      64(%%"REG_c")            \n\t"
+
+#if ARCH_X86_64
+
+#define CALL_MMX2_FILTER_CODE \
+            "movl            (%%"REG_b"), %%esi     \n\t"\
+            "call                    *%4            \n\t"\
+            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
+            "add               %%"REG_S", %%"REG_c" \n\t"\
+            "add               %%"REG_a", %%"REG_D" \n\t"\
+            "xor               %%"REG_a", %%"REG_a" \n\t"\
+
+#else
+
+#define CALL_MMX2_FILTER_CODE \
+            "movl (%%"REG_b"), %%esi        \n\t"\
+            "call         *%4                       \n\t"\
+            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+            "add               %%"REG_a", %%"REG_D" \n\t"\
+            "xor               %%"REG_a", %%"REG_a" \n\t"\
+
+#endif /* ARCH_X86_64 */
+
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+
+#if defined(PIC)
+            "mov                      %5, %%"REG_b" \n\t"
+#endif
+            :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
+            "m" (mmx2FilterCode)
+#if defined(PIC)
+            ,"m" (ebxsave)
+#endif
+            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+            ,"%"REG_b
+#endif
+        );
+        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
+    } else {
+#endif /* COMPILE_TEMPLATE_MMX2 */
+    x86_reg xInc_shr16 = xInc >> 16;
+    uint16_t xInc_mask = xInc & 0xffff;
+    x86_reg dstWidth_reg = dstWidth;
+    //NO MMX just normal asm ...
+    __asm__ volatile(
+        "xor %%"REG_a", %%"REG_a"            \n\t" // i
+        "xor %%"REG_d", %%"REG_d"            \n\t" // xx
+        "xorl    %%ecx, %%ecx                \n\t" // xalpha
+        ".p2align                4           \n\t"
+        "1:                                  \n\t"
+        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
+        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
+        FAST_BILINEAR_X86
+        "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
+        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
+
+        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
+        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
+        FAST_BILINEAR_X86
+        "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
+        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
+
+
+        "add        $2, %%"REG_a"            \n\t"
+        "cmp        %2, %%"REG_a"            \n\t"
+        " jb        1b                       \n\t"
+
+
+        :: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask)
+        : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
+    );
+#if COMPILE_TEMPLATE_MMX2
+    } //if MMX2 can't be used
+#endif
+}
+
+      // *** horizontal scale Y line to temp buffer
+static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
+                                   const int16_t *hLumFilter,
+                                   const int16_t *hLumFilterPos, int hLumFilterSize,
+                                   uint8_t *formatConvBuffer,
+                                   uint32_t *pal, int isAlpha)
+{
+    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
+
+    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
+
+    if (toYV12) {
+        toYV12(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+
+    if (c->hScale16) {
+        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hyscale_fast) {
+        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
+    }
+
+    if (convertRange)
+        convertRange(dst, dstWidth);
+}
+
+static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
+                                        long dstWidth, const uint8_t *src1,
+                                        const uint8_t *src2, int srcW, int xInc)
+{
+#if COMPILE_TEMPLATE_MMX2
+    int32_t *filterPos = c->hChrFilterPos;
+    int16_t *filter    = c->hChrFilter;
+    int     canMMX2BeUsed  = c->canMMX2BeUsed;
+    void    *mmx2FilterCode= c->chrMmx2FilterCode;
+    int i;
+#if defined(PIC)
+    DECLARE_ALIGNED(8, uint64_t, ebxsave);
+#endif
+    if (canMMX2BeUsed) {
+        __asm__ volatile(
+#if defined(PIC)
+            "mov          %%"REG_b", %6         \n\t"
+#endif
+            "pxor             %%mm7, %%mm7      \n\t"
+            "mov                 %0, %%"REG_c"  \n\t"
+            "mov                 %1, %%"REG_D"  \n\t"
+            "mov                 %2, %%"REG_d"  \n\t"
+            "mov                 %3, %%"REG_b"  \n\t"
+            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+            PREFETCH"   (%%"REG_c")             \n\t"
+            PREFETCH" 32(%%"REG_c")             \n\t"
+            PREFETCH" 64(%%"REG_c")             \n\t"
+
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+            "mov                 %5, %%"REG_c"  \n\t" // src
+            "mov                 %1, %%"REG_D"  \n\t" // buf1
+            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+            PREFETCH"   (%%"REG_c")             \n\t"
+            PREFETCH" 32(%%"REG_c")             \n\t"
+            PREFETCH" 64(%%"REG_c")             \n\t"
+
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+            CALL_MMX2_FILTER_CODE
+
+#if defined(PIC)
+            "mov %6, %%"REG_b"    \n\t"
+#endif
+            :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
+            "m" (mmx2FilterCode), "m" (src2)
+#if defined(PIC)
+            ,"m" (ebxsave)
+#endif
+            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+            ,"%"REG_b
+#endif
+        );
+        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+            //printf("%d %d %d\n", dstWidth, i, srcW);
+            dst[i] = src1[srcW-1]*128;
+            dst[i+VOFW] = src2[srcW-1]*128;
+        }
+    } else {
+#endif /* COMPILE_TEMPLATE_MMX2 */
+        x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
+        uint16_t xInc_mask = xInc & 0xffff;
+        x86_reg dstWidth_reg = dstWidth;
+        __asm__ volatile(
+            "xor %%"REG_a", %%"REG_a"               \n\t" // i
+            "xor %%"REG_d", %%"REG_d"               \n\t" // xx
+            "xorl    %%ecx, %%ecx                   \n\t" // xalpha
+            ".p2align    4                          \n\t"
+            "1:                                     \n\t"
+            "mov        %0, %%"REG_S"               \n\t"
+            "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
+            "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
+            FAST_BILINEAR_X86
+            "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
+
+            "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
+            "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
+            FAST_BILINEAR_X86
+            "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
+
+            "addw       %4, %%cx                    \n\t" //xalpha += xInc&0xFFFF
+            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>16 + carry
+            "add        $1, %%"REG_a"               \n\t"
+            "cmp        %2, %%"REG_a"               \n\t"
+            " jb        1b                          \n\t"
+
+/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
+which is needed to support GCC 4.0. */
+#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
+            :: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
+#else
+            :: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
+#endif
+            "r" (src2)
+            : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
+        );
+#if COMPILE_TEMPLATE_MMX2
+    } //if MMX2 can't be used
+#endif
+}
+
+inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
+                                   int srcW, int xInc, const int16_t *hChrFilter,
+                                   const int16_t *hChrFilterPos, int hChrFilterSize,
+                                   uint8_t *formatConvBuffer,
+                                   uint32_t *pal)
+{
+
+    src1 += c->chrSrcOffset;
+    src2 += c->chrSrcOffset;
+
+    if (c->chrToYV12) {
+        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+
+    if (c->hScale16) {
+        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+    } else if (!c->hcscale_fast) {
+        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
+    }
+
+    if (c->chrConvertRange)
+        c->chrConvertRange(dst, dstWidth);
+}
+
+#define DEBUG_SWSCALE_BUFFERS 0
+#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
+
+static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    /* load a few things into local vars to make the code more readable? and faster */
+    const int srcW= c->srcW;
+    const int dstW= c->dstW;
+    const int dstH= c->dstH;
+    const int chrDstW= c->chrDstW;
+    const int chrSrcW= c->chrSrcW;
+    const int lumXInc= c->lumXInc;
+    const int chrXInc= c->chrXInc;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    const int flags= c->flags;
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *hLumFilterPos= c->hLumFilterPos;
+    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int16_t *hLumFilter= c->hLumFilter;
+    int16_t *hChrFilter= c->hChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int hLumFilterSize= c->hLumFilterSize;
+    const int hChrFilterSize= c->hChrFilterSize;
+    int16_t **lumPixBuf= c->lumPixBuf;
+    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **alpPixBuf= c->alpPixBuf;
+    const int vLumBufSize= c->vLumBufSize;
+    const int vChrBufSize= c->vChrBufSize;
+    uint8_t *formatConvBuffer= c->formatConvBuffer;
+    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
+    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+    int lastDstY;
+    uint32_t *pal=c->pal_yuv;
+
+    /* vars which will change and which we need to store back in the context */
+    int dstY= c->dstY;
+    int lumBufIndex= c->lumBufIndex;
+    int chrBufIndex= c->chrBufIndex;
+    int lastInLumBuf= c->lastInLumBuf;
+    int lastInChrBuf= c->lastInChrBuf;
+
+    if (isPacked(c->srcFormat)) {
+        src[0]=
+        src[1]=
+        src[2]=
+        src[3]= src[0];
+        srcStride[0]=
+        srcStride[1]=
+        srcStride[2]=
+        srcStride[3]= srcStride[0];
+    }
+    srcStride[1]<<= c->vChrDrop;
+    srcStride[2]<<= c->vChrDrop;
+
+    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
+                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
+                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
+    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
+                   srcSliceY,    srcSliceH,    dstY,    dstH);
+    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
+                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
+
+    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
+        static int warnedAlready=0; //FIXME move this into the context perhaps
+        if (flags & SWS_PRINT_INFO && !warnedAlready) {
+            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
+                   "         ->cannot do aligned memory accesses anymore\n");
+            warnedAlready=1;
+        }
+    }
+
+    /* Note the user might start scaling the picture in the middle so this
+       will not get executed. This is not really intended but works
+       currently, so people might do it. */
+    if (srcSliceY ==0) {
+        lumBufIndex=-1;
+        chrBufIndex=-1;
+        dstY=0;
+        lastInLumBuf= -1;
+        lastInChrBuf= -1;
+    }
+
+    lastDstY= dstY;
+
+    for (;dstY < dstH; dstY++) {
+        unsigned char *dest =dst[0]+dstStride[0]*dstY;
+        const int chrDstY= dstY>>c->chrDstVSubSample;
+        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
+        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
+        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+
+        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
+        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
+        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
+        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
+        int enough_lines;
+
+        //handle holes (FAST_BILINEAR & weird filters)
+        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
+        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
+        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
+        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
+
+        DEBUG_BUFFERS("dstY: %d\n", dstY);
+        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
+                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
+        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
+                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
+
+        // Do we have enough lines in this slice to output the dstY line
+        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
+
+        if (!enough_lines) {
+            lastLumSrcY = srcSliceY + srcSliceH - 1;
+            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
+            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
+                                            lastLumSrcY, lastChrSrcY);
+        }
+
+        //Do horizontal scaling
+        while(lastInLumBuf < lastLumSrcY) {
+            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
+            lumBufIndex++;
+            assert(lumBufIndex < 2*vLumBufSize);
+            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+            assert(lastInLumBuf + 1 - srcSliceY >= 0);
+            RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+                            hLumFilter, hLumFilterPos, hLumFilterSize,
+                            formatConvBuffer,
+                            pal, 0);
+            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
+                RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
+                                hLumFilter, hLumFilterPos, hLumFilterSize,
+                                formatConvBuffer,
+                                pal, 1);
+            lastInLumBuf++;
+            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
+                               lumBufIndex,    lastInLumBuf);
+        }
+        while(lastInChrBuf < lastChrSrcY) {
+            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+            chrBufIndex++;
+            assert(chrBufIndex < 2*vChrBufSize);
+            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
+            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+            //FIXME replace parameters through context struct (some at least)
+
+            if (c->needs_hcscale)
+                RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                                hChrFilter, hChrFilterPos, hChrFilterSize,
+                                formatConvBuffer,
+                                pal);
+            lastInChrBuf++;
+            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
+                               chrBufIndex,    lastInChrBuf);
+        }
+        //wrap buf index around to stay inside the ring buffer
+        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
+        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
+        if (!enough_lines)
+            break; //we can't output a dstY line so let's try with the next slice
+
+        c->blueDither= ff_dither8[dstY&1];
+        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+            c->greenDither= ff_dither8[dstY&1];
+        else
+            c->greenDither= ff_dither4[dstY&1];
+        c->redDither= ff_dither8[(dstY+1)&1];
+        if (dstY < dstH-2) {
+            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            int i;
+            if (flags & SWS_ACCURATE_RND) {
+                int s= APCK_SIZE / 8;
+                for (i=0; i<vLumFilterSize; i+=2) {
+                    *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                    *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                              lumMmxFilter[s*i+APCK_COEF/4  ]=
+                              lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
+                        + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
+                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                        *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
+                        *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
+                                  alpMmxFilter[s*i+APCK_COEF/4  ]=
+                                  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
+                    }
+                }
+                for (i=0; i<vChrFilterSize; i+=2) {
+                    *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
+                    *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                              chrMmxFilter[s*i+APCK_COEF/4  ]=
+                              chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                        + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
+                }
+            } else {
+                for (i=0; i<vLumFilterSize; i++) {
+                    lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
+                    lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                    lumMmxFilter[4*i+2]=
+                    lumMmxFilter[4*i+3]=
+                        ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                        alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
+                        alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
+                        alpMmxFilter[4*i+2]=
+                        alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
+                    }
+                }
+                for (i=0; i<vChrFilterSize; i++) {
+                    chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
+                    chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                    chrMmxFilter[4*i+2]=
+                    chrMmxFilter[4*i+3]=
+                        ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+                }
+            }
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                c->yuv2nv12X(c,
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
+                    yuv2yuvX16inC(
+                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+                    const int16_t *lumBuf = lumSrcPtr[0];
+                    const int16_t *chrBuf= chrSrcPtr[0];
+                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
+                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
+                } else { //General YV12
+                    c->yuv2yuvX(c,
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL,
+                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                    }
+                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
+                    int lumAlpha= vLumFilter[2*dstY+1];
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    lumMmxFilter[2]=
+                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
+                    chrMmxFilter[2]=
+                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
+                                       dest, dstW, lumAlpha, chrAlpha, dstY);
+                    }
+                } else { //general RGB
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c,
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packedX(c,
+                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                       alpSrcPtr, dest, dstW, dstY);
+                    }
+                }
+            }
+        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
+            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                yuv2nv12XinC(
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
+                    yuv2yuvX16inC(
+                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else {
+                    yuv2yuvXinC(
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if(flags & SWS_FULL_CHR_H_INT) {
+                    yuv2rgbXinC_full(c,
+                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                     alpSrcPtr, dest, dstW, dstY);
+                } else {
+                    yuv2packedXinC(c,
+                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                   alpSrcPtr, dest, dstW, dstY);
+                }
+            }
+        }
+    }
+
+    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
+        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
+
+    if (flags & SWS_CPU_CAPS_MMX2 )  __asm__ volatile("sfence":::"memory");
+    /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+    if (flags & SWS_CPU_CAPS_3DNOW)  __asm__ volatile("femms" :::"memory");
+    else                             __asm__ volatile("emms"  :::"memory");
+    /* store changed local vars back in the context */
+    c->dstY= dstY;
+    c->lumBufIndex= lumBufIndex;
+    c->chrBufIndex= chrBufIndex;
+    c->lastInLumBuf= lastInLumBuf;
+    c->lastInChrBuf= lastInChrBuf;
+
+    return dstY - lastDstY;
+}
+
+static void RENAME(sws_init_swScale)(SwsContext *c)
+{
+    enum PixelFormat srcFormat = c->srcFormat;
+
+    c->yuv2nv12X    = RENAME(yuv2nv12X   );
+    c->yuv2yuv1     = RENAME(yuv2yuv1    );
+    c->yuv2yuvX     = RENAME(yuv2yuvX    );
+    c->yuv2packed1  = RENAME(yuv2packed1 );
+    c->yuv2packed2  = RENAME(yuv2packed2 );
+    c->yuv2packedX  = RENAME(yuv2packedX );
+
+    c->hScale       = RENAME(hScale      );
+
+    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
+    if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
+    {
+        c->hyscale_fast = RENAME(hyscale_fast);
+        c->hcscale_fast = RENAME(hcscale_fast);
+    } else {
+        c->hyscale_fast = NULL;
+        c->hcscale_fast = NULL;
+    }
+
+    switch(srcFormat) {
+        case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
+        case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
+        case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
+        case PIX_FMT_NV21     : c->chrToYV12 = RENAME(nv21ToUV); break;
+        case PIX_FMT_YUV420P16BE:
+        case PIX_FMT_YUV422P16BE:
+        case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
+        case PIX_FMT_YUV420P16LE:
+        case PIX_FMT_YUV422P16LE:
+        case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
+        default: break;
+    }
+    if (c->chrSrcHSubSample) {
+        switch(srcFormat) {
+        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
+        default: break;
+        }
+    } else {
+        switch(srcFormat) {
+        case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = RENAME(rgb24ToUV); break;
+        default: break;
+        }
+    }
+
+    switch (srcFormat) {
+    case PIX_FMT_YUYV422  :
+    case PIX_FMT_YUV420P16BE:
+    case PIX_FMT_YUV422P16BE:
+    case PIX_FMT_YUV444P16BE:
+    case PIX_FMT_Y400A    :
+    case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
+    case PIX_FMT_UYVY422  :
+    case PIX_FMT_YUV420P16LE:
+    case PIX_FMT_YUV422P16LE:
+    case PIX_FMT_YUV444P16LE:
+    case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
+    default: break;
+    }
+    if (c->alpPixBuf) {
+        switch (srcFormat) {
+        case PIX_FMT_Y400A  : c->alpToYV12 = RENAME(yuy2ToY); break;
+        default: break;
+        }
+    }
+
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->srcRange) {
+            c->lumConvertRange = RENAME(lumRangeFromJpeg);
+            c->chrConvertRange = RENAME(chrRangeFromJpeg);
+        } else {
+            c->lumConvertRange = RENAME(lumRangeToJpeg);
+            c->chrConvertRange = RENAME(chrRangeToJpeg);
+        }
+    }
+}
diff --git a/libswscale/x86/swscale_template.h b/libswscale/x86/swscale_template.h
new file mode 100644
index 0000000000..74e12c75af
--- /dev/null
+++ b/libswscale/x86/swscale_template.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef SWSCALE_X86_SWSCALE_TEMPLATE_H
+#define SWSCALE_X86_SWSCALE_TEMPLATE_H
+
+DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
+DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
+DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
+DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
+DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
+DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+        0x0103010301030103LL,
+        0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+        0x0602060206020602LL,
+        0x0004000400040004LL,};
+
+DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
+DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
+
+DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
+
+#ifdef FAST_BGR2YV12
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
+#else
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
+#endif /* FAST_BGR2YV12 */
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
+    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
+    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
+};
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+
+#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */

From ecf72542fac6d05d88efe4a7a474adb8ec291fbe Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 25 May 2011 09:37:25 +0200
Subject: [PATCH 258/830] vsrc_buffer: remove duplicated file description

---
 libavfilter/vsrc_buffer.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index 7e2526bea0..c717f3dae4 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -1,5 +1,4 @@
 /*
- * Memory buffer source filter
  * Copyright (c) 2008 Vitor Sessak
  *
  * This file is part of FFmpeg.
@@ -27,7 +26,6 @@
  * memory buffer source API for video
  */
 
-
 #include "avfilter.h"
 
 /**

From 06fd213eb608c23f58e290cfad6262b94e30fc74 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 25 May 2011 09:43:14 +0200
Subject: [PATCH 259/830] Do not reset channel_layout to 0.

The channel_layout may have been set by the demuxer.
---
 libavcodec/aacdec.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index c56c0af986..19d3b3678f 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -251,8 +251,6 @@ static av_cold int output_configure(AACContext *ac,
         }
 
         memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
-
-        avctx->channel_layout = 0;
     }
 
     avctx->channels = channels;

From a3da17730e5704555d62a7eb32c1f988f8efed66 Mon Sep 17 00:00:00 2001
From: John Stebbins <stebbins@jetheaddev.com>
Date: Wed, 25 May 2011 09:43:55 +0200
Subject: [PATCH 260/830] Fix end time of last chapter in
 compute_chapters_end().

---
 libavformat/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 52d8a2dc16..20b21f29db 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2153,7 +2153,7 @@ enum CodecID av_codec_get_id(const AVCodecTag * const *tags, unsigned int tag)
 static void compute_chapters_end(AVFormatContext *s)
 {
     unsigned int i, j;
-    int64_t max_time = s->duration + (s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time;
+    int64_t max_time = s->duration + ((s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time);
 
     for (i = 0; i < s->nb_chapters; i++)
         if (s->chapters[i]->end == AV_NOPTS_VALUE) {

From 39d983461a81766f06bf66c0be254f5bdd7b8eb5 Mon Sep 17 00:00:00 2001
From: Reuben Thomas <rrt@sc3d.org>
Date: Wed, 25 May 2011 11:15:56 +0200
Subject: [PATCH 261/830] Add reading of disc number to mov.c

---
 libavformat/mov.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 726ec1e4b1..e51116b304 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -79,15 +79,15 @@ typedef struct MOVParseTableEntry {
 
 static const MOVParseTableEntry mov_default_parse_table[];
 
-static int mov_metadata_trkn(MOVContext *c, AVIOContext *pb, unsigned len)
+static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, unsigned len, const char *type)
 {
     char buf[16];
 
     avio_rb16(pb); // unknown
     snprintf(buf, sizeof(buf), "%d", avio_rb16(pb));
-    av_metadata_set2(&c->fc->metadata, "track", buf, 0);
+    av_metadata_set2(&c->fc->metadata, type, buf, 0);
 
-    avio_rb16(pb); // total tracks
+    avio_rb16(pb); // total tracks/discs
 
     return 0;
 }
@@ -138,7 +138,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     const char *key = NULL;
     uint16_t str_size, langcode = 0;
     uint32_t data_type = 0;
-    int (*parse)(MOVContext*, AVIOContext*, unsigned) = NULL;
+    int (*parse)(MOVContext*, AVIOContext*, unsigned, const char *) = NULL;
 
     switch (atom.type) {
     case MKTAG(0xa9,'n','a','m'): key = "title";     break;
@@ -164,7 +164,9 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     case MKTAG( 't','v','e','n'): key = "episode_id";break;
     case MKTAG( 't','v','n','n'): key = "network";   break;
     case MKTAG( 't','r','k','n'): key = "track";
-        parse = mov_metadata_trkn; break;
+        parse = mov_metadata_track_or_disc_number; break;
+    case MKTAG( 'd','i','s','k'): key = "disc";
+        parse = mov_metadata_track_or_disc_number; break;
     }
 
     if (c->itunes_metadata && atom.size > 8) {
@@ -199,7 +201,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     str_size = FFMIN3(sizeof(str)-1, str_size, atom.size);
 
     if (parse)
-        parse(c, pb, str_size);
+        parse(c, pb, str_size, key);
     else {
         if (data_type == 3 || (data_type == 0 && langcode < 0x800)) { // MAC Encoded
             mov_read_mac_string(c, pb, str_size, str, sizeof(str));

From 48df6a241532f0702fc4fd10ddcbfac435e4027c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 17 May 2011 22:21:33 +0200
Subject: [PATCH 262/830] indeo3: add out-of-buffer write check

Prevent out-of-buffer writes. In particular fix smclocki32.avi.1.1
crash, trac issue #114, roundup issue #1482.
---
 libavcodec/indeo3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index b74fcf7c22..8e55fbe443 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -213,6 +213,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
     int *width_tbl, width_tbl_arr[10];
     const signed char *ref_vectors;
     uint8_t *cur_frm_pos, *ref_frm_pos, *cp, *cp2;
+    uint8_t *cur_end = cur + width*height + width;
     uint32_t *cur_lp, *ref_lp;
     const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2];
     uint8_t *correction_type_sp[2];
@@ -359,6 +360,8 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                             k = *buf1++;
                             cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2];
                             ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2];
+                            if ((uint8_t *)cur_lp >= cur_end-3)
+                                break;
 
                             switch(correction_type_sp[0][k]) {
                             case 0:

From 6b5e1825406a8180c72863bf63964ae50a8499db Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 00:59:47 +0200
Subject: [PATCH 263/830] ffmpeg: handle the case when get_filtered_frame()
 fails

---
 ffmpeg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index af57fc9456..7f7e060856 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1692,7 +1692,8 @@ static int output_packet(AVInputStream *ist, int ist_index,
                 while (frame_available) {
                     AVRational ist_pts_tb;
                     if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ost->output_video_filter)
-                        get_filtered_video_frame(ost->output_video_filter, &picture, &ost->picref, &ist_pts_tb);
+                        if (get_filtered_video_frame(ost->output_video_filter, &picture, &ost->picref, &ist_pts_tb) < 0)
+                            goto cont;
                     if (ost->picref)
                         ist->pts = av_rescale_q(ost->picref->pts, ist_pts_tb, AV_TIME_BASE_Q);
 #endif

From cf06e3e4dd8c1023fb1dcad905f3f77fdc1cf3fb Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 20 May 2011 01:00:59 +0200
Subject: [PATCH 264/830] vsrc_buffer: return an error code if no frames are
 available

Also decrease the log level of the corresponding message to WARNING,
since the error is not fatal.
---
 libavfilter/vsrc_buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index 6a2fcbf36e..d1e6ffd57a 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -180,9 +180,9 @@ static int request_frame(AVFilterLink *link)
     BufferSourceContext *c = link->src->priv;
 
     if (!c->picref) {
-        av_log(link->src, AV_LOG_ERROR,
+        av_log(link->src, AV_LOG_WARNING,
                "request_frame() called with no available frame!\n");
-        //return -1;
+        return AVERROR(EINVAL);
     }
 
     avfilter_start_frame(link, avfilter_ref_buffer(c->picref, ~0));

From 3c2c52baecb6c8c2af62989462804af7765694c4 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 23 May 2011 13:13:50 +0200
Subject: [PATCH 265/830] eval: implement not() expression

---
 doc/eval.texi      | 8 +++-----
 libavutil/avutil.h | 2 +-
 libavutil/eval.c   | 8 +++++++-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/doc/eval.texi b/doc/eval.texi
index e07267bdfa..25c0ea6524 100644
--- a/doc/eval.texi
+++ b/doc/eval.texi
@@ -76,6 +76,9 @@ integer. For example, "trunc(-1.5)" is "-1.0".
 @item sqrt(expr)
 Compute the square root of @var{expr}. This is equivalent to
 "(@var{expr})^.5".
+
+@item not(expr)
+Return 1.0 if @var{expr} is zero, 0.0 otherwise.
 @end table
 
 Note that:
@@ -93,11 +96,6 @@ is equivalent to
 A*B + not(A)*C
 @end example
 
-When A evaluates to either 1 or 0, that is the same as
-@example
-A*B + eq(A,0)*C
-@end example
-
 In your C code, you can extend the list of unary and binary functions,
 and define recognized constants, so that they are available for your
 expressions.
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 09188f837e..ca3e029459 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -41,7 +41,7 @@
 
 #define LIBAVUTIL_VERSION_MAJOR 51
 #define LIBAVUTIL_VERSION_MINOR  2
-#define LIBAVUTIL_VERSION_MICRO  1
+#define LIBAVUTIL_VERSION_MICRO  2
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/eval.c b/libavutil/eval.c
index 9271fd6cbc..331221515e 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -125,7 +125,7 @@ struct AVExpr {
         e_mod, e_max, e_min, e_eq, e_gt, e_gte,
         e_pow, e_mul, e_div, e_add,
         e_last, e_st, e_while, e_floor, e_ceil, e_trunc,
-        e_sqrt,
+        e_sqrt, e_not,
     } type;
     double value; // is sign in other types
     union {
@@ -153,6 +153,7 @@ static double eval_expr(Parser *p, AVExpr *e)
         case e_ceil :  return e->value * ceil (eval_expr(p, e->param[0]));
         case e_trunc:  return e->value * trunc(eval_expr(p, e->param[0]));
         case e_sqrt:   return e->value * sqrt (eval_expr(p, e->param[0]));
+        case e_not:    return e->value * eval_expr(p, e->param[0]) == 0;
         case e_while: {
             double d = NAN;
             while (eval_expr(p, e->param[0]))
@@ -288,6 +289,7 @@ static int parse_primary(AVExpr **e, Parser *p)
     else if (strmatch(next, "ceil"  )) d->type = e_ceil;
     else if (strmatch(next, "trunc" )) d->type = e_trunc;
     else if (strmatch(next, "sqrt"  )) d->type = e_sqrt;
+    else if (strmatch(next, "not"   )) d->type = e_not;
     else {
         for (i=0; p->func1_names && p->func1_names[i]; i++) {
             if (strmatch(next, p->func1_names[i])) {
@@ -456,6 +458,7 @@ static int verify_expr(AVExpr *e)
         case e_ceil:
         case e_trunc:
         case e_sqrt:
+        case e_not:
             return verify_expr(e->param[0]);
         default: return verify_expr(e->param[0]) && verify_expr(e->param[1]);
     }
@@ -637,6 +640,9 @@ int main(void)
         "ceil(-123.123)",
         "sqrt(1764)",
         "sqrt(-1)",
+        "not(1)",
+        "not(NAN)",
+        "not(0)",
         NULL
     };
 

From 07586b68a8a496e44c7c977599e1ec09d07fd57f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 7 May 2011 02:06:25 +0200
Subject: [PATCH 266/830] lavfi: add select filter

Address trac issue #92.
---
 Changelog                |   1 +
 doc/filters.texi         | 116 +++++++++++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/avfilter.h   |   2 +-
 libavfilter/vf_select.c  | 351 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 471 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_select.c

diff --git a/Changelog b/Changelog
index 734396dfde..c5bec6a53b 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version <next>:
 - 9bit and 10bit H.264 decoding
 - 9bit and 10bit FFV1 encoding / decoding
 - split filter added
+- select filter added
 
 
 version 0.7_beta1:
diff --git a/doc/filters.texi b/doc/filters.texi
index 363a7f5584..18f46f96df 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1163,6 +1163,122 @@ scale="trunc(3/2*iw/hsub)*hsub:trunc(3/2*ih/vsub)*vsub"
 scale='min(500\, iw*3/2):-1'
 @end example
 
+@section select
+Select frames to pass in output.
+
+It accepts in input an expression, which is evaluated for each input
+frame. If the expression is evaluated to a non-zero value, the frame
+is selected and passed to the output, otherwise it is discarded.
+
+The expression can contain the following constants:
+
+@table @option
+@item PI
+Greek PI
+
+@item PHI
+golden ratio
+
+@item E
+Euler number
+
+@item n
+the sequential number of the filtered frame, starting from 0
+
+@item selected_n
+the sequential number of the selected frame, starting from 0
+
+@item prev_selected_n
+the sequential number of the last selected frame, NAN if undefined
+
+@item TB
+timebase of the input timestamps
+
+@item pts
+the PTS (Presentation TimeStamp) of the filtered video frame,
+expressed in @var{TB} units, NAN if undefined
+
+@item t
+the PTS (Presentation TimeStamp) of the filtered video frame,
+expressed in seconds, NAN if undefined
+
+@item prev_pts
+the PTS of the previously filtered video frame, NAN if undefined
+
+@item prev_selected_pts
+the PTS of the last previously filtered video frame, NAN if undefined
+
+@item prev_selected_t
+the PTS of the last previously selected video frame, NAN if undefined
+
+@item start_pts
+the PTS of the first video frame in the video, NAN if undefined
+
+@item start_t
+the time of the first video frame in the video, NAN if undefined
+
+@item pict_type
+the picture type of the filtered frame, can assume one of the following
+values:
+@table @option
+@item PICT_TYPE_I
+@item PICT_TYPE_P
+@item PICT_TYPE_B
+@item PICT_TYPE_S
+@item PICT_TYPE_SI
+@item PICT_TYPE_SP
+@item PICT_TYPE_BI
+@end table
+
+@item interlace_type
+the frame interlace type, can assume one of the following values:
+@table @option
+@item INTERLACE_TYPE_P
+the frame is progressive (not interlaced)
+@item INTERLACE_TYPE_T
+the frame is top-field-first
+@item INTERLACE_TYPE_B
+the frame is bottom-field-first
+@end table
+
+@item key
+1 if the filtered frame is a key-frame, 0 otherwise
+
+@item pos
+the position in the file of the filtered frame, -1 if the information
+is not available (e.g. for synthetic video)
+@end table
+
+The default value of the select expression is "1".
+
+Some examples follow:
+
+@example
+# select all frames in input
+select
+
+# the above is the same as:
+select=1
+
+# skip all frames:
+select=0
+
+# select only I-frames
+select='eq(pict_type\,PICT_TYPE_I)'
+
+# select one frame every 100
+select='not(mod(n\,100))'
+
+# select only frames contained in the 10-20 time interval
+select='gte(t\,10)*lte(t\,20)'
+
+# select only I frames contained in the 10-20 time interval
+select='gte(t\,10)*lte(t\,20)*eq(pict_type\,PICT_TYPE_I)'
+
+# select frames with a minimum distance of 10 seconds
+select='isnan(prev_selected_t)+gte(t-prev_selected_t\,10)'
+@end example
+
 @anchor{setdar}
 @section setdar
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 8130ee4d20..2324fb999e 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -46,6 +46,7 @@ OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o
+OBJS-$(CONFIG_SELECT_FILTER)                 += vf_select.o
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETPTS_FILTER)                 += vf_setpts.o
 OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 0b6487f540..5f1065f23f 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -62,6 +62,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER (PAD,         pad,         vf);
     REGISTER_FILTER (PIXDESCTEST, pixdesctest, vf);
     REGISTER_FILTER (SCALE,       scale,       vf);
+    REGISTER_FILTER (SELECT,      select,      vf);
     REGISTER_FILTER (SETDAR,      setdar,      vf);
     REGISTER_FILTER (SETPTS,      setpts,      vf);
     REGISTER_FILTER (SETSAR,      setsar,      vf);
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index cee5bbc114..602b2437d9 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 10
+#define LIBAVFILTER_VERSION_MINOR 11
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_select.c b/libavfilter/vf_select.c
new file mode 100644
index 0000000000..3d05167d13
--- /dev/null
+++ b/libavfilter/vf_select.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2011 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * filter for selecting which frame passes in the filterchain
+ */
+
+#include "libavutil/eval.h"
+#include "libavutil/fifo.h"
+#include "avfilter.h"
+
+static const char *var_names[] = {
+    "E",                 ///< Euler number
+    "PHI",               ///< golden ratio
+    "PI",                ///< greek pi
+
+    "TB",                ///< timebase
+
+    "pts",               ///< original pts in the file of the frame
+    "start_pts",         ///< first PTS in the stream, expressed in TB units
+    "prev_pts",          ///< previous frame PTS
+    "prev_selected_pts", ///< previous selected frame PTS
+
+    "t",                 ///< first PTS in seconds
+    "start_t",           ///< first PTS in the stream, expressed in seconds
+    "prev_t",            ///< previous frame time
+    "prev_selected_t",   ///< previously selected time
+
+    "pict_type",         ///< the type of picture in the movie
+    "PICT_TYPE_I",
+    "PICT_TYPE_P",
+    "PICT_TYPE_B",
+    "PICT_TYPE_S",
+    "PICT_TYPE_SI",
+    "PICT_TYPE_SP",
+    "PICT_TYPE_BI",
+
+    "interlace_type",    ///< the frame interlace type
+    "INTERLACE_TYPE_P",
+    "INTERLACE_TYPE_T",
+    "INTERLACE_TYPE_B",
+
+    "n",                 ///< frame number (starting from zero)
+    "selected_n",        ///< selected frame number (starting from zero)
+    "prev_selected_n",   ///< number of the last selected frame
+
+    "key",               ///< tell if the frame is a key frame
+    "pos",               ///< original position in the file of the frame
+
+    NULL
+};
+
+enum var_name {
+    VAR_E,
+    VAR_PHI,
+    VAR_PI,
+
+    VAR_TB,
+
+    VAR_PTS,
+    VAR_START_PTS,
+    VAR_PREV_PTS,
+    VAR_PREV_SELECTED_PTS,
+
+    VAR_T,
+    VAR_START_T,
+    VAR_PREV_T,
+    VAR_PREV_SELECTED_T,
+
+    VAR_PICT_TYPE,
+    VAR_PICT_TYPE_I,
+    VAR_PICT_TYPE_P,
+    VAR_PICT_TYPE_B,
+    VAR_PICT_TYPE_S,
+    VAR_PICT_TYPE_SI,
+    VAR_PICT_TYPE_SP,
+    VAR_PICT_TYPE_BI,
+
+    VAR_INTERLACE_TYPE,
+    VAR_INTERLACE_TYPE_P,
+    VAR_INTERLACE_TYPE_T,
+    VAR_INTERLACE_TYPE_B,
+
+    VAR_N,
+    VAR_SELECTED_N,
+    VAR_PREV_SELECTED_N,
+
+    VAR_KEY,
+    VAR_POS,
+
+    VAR_VARS_NB
+};
+
+#define FIFO_SIZE 8
+
+typedef struct {
+    AVExpr *expr;
+    double var_values[VAR_VARS_NB];
+    double select;
+    int cache_frames;
+    AVFifoBuffer *pending_frames; ///< FIFO buffer of video frames
+} SelectContext;
+
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
+{
+    SelectContext *select = ctx->priv;
+    int ret;
+
+    if ((ret = av_expr_parse(&select->expr, args ? args : "1",
+                             var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n", args);
+        return ret;
+    }
+
+    select->pending_frames = av_fifo_alloc(FIFO_SIZE*sizeof(AVFilterBufferRef*));
+    if (!select->pending_frames) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to allocate pending frames buffer.\n");
+        return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+#define INTERLACE_TYPE_P 0
+#define INTERLACE_TYPE_T 1
+#define INTERLACE_TYPE_B 2
+
+static int config_input(AVFilterLink *inlink)
+{
+    SelectContext *select = inlink->dst->priv;
+
+    select->var_values[VAR_E]   = M_E;
+    select->var_values[VAR_PHI] = M_PHI;
+    select->var_values[VAR_PI]  = M_PI;
+
+    select->var_values[VAR_N]          = 0.0;
+    select->var_values[VAR_SELECTED_N] = 0.0;
+
+    select->var_values[VAR_TB] = av_q2d(inlink->time_base);
+
+    select->var_values[VAR_PREV_PTS]          = NAN;
+    select->var_values[VAR_PREV_SELECTED_PTS] = NAN;
+    select->var_values[VAR_PREV_SELECTED_T]   = NAN;
+    select->var_values[VAR_START_PTS]         = NAN;
+    select->var_values[VAR_START_T]           = NAN;
+
+    select->var_values[VAR_PICT_TYPE_I]  = AV_PICTURE_TYPE_I;
+    select->var_values[VAR_PICT_TYPE_P]  = AV_PICTURE_TYPE_P;
+    select->var_values[VAR_PICT_TYPE_B]  = AV_PICTURE_TYPE_B;
+    select->var_values[VAR_PICT_TYPE_SI] = AV_PICTURE_TYPE_SI;
+    select->var_values[VAR_PICT_TYPE_SP] = AV_PICTURE_TYPE_SP;
+
+    select->var_values[VAR_INTERLACE_TYPE_P] = INTERLACE_TYPE_P;
+    select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T;
+    select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B;;
+
+    return 0;
+}
+
+#define D2TS(d)  (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d))
+#define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts))
+
+static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref)
+{
+    SelectContext *select = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    double res;
+
+    if (isnan(select->var_values[VAR_START_PTS]))
+        select->var_values[VAR_START_PTS] = TS2D(picref->pts);
+
+    select->var_values[VAR_PTS] = TS2D(picref->pts);
+    select->var_values[VAR_T  ] = picref->pts * av_q2d(inlink->time_base);
+    select->var_values[VAR_POS] = picref->pos == -1 ? NAN : picref->pos;
+    select->var_values[VAR_PREV_PTS] = TS2D(picref ->pts);
+
+    select->var_values[VAR_INTERLACE_TYPE] =
+        !picref->video->interlaced     ? INTERLACE_TYPE_P :
+        picref->video->top_field_first ? INTERLACE_TYPE_T : INTERLACE_TYPE_B;
+    select->var_values[VAR_PICT_TYPE] = picref->video->pict_type;
+
+    res = av_expr_eval(select->expr, select->var_values, NULL);
+    av_log(inlink->dst, AV_LOG_DEBUG,
+           "n:%d pts:%d t:%f pos:%d interlace_type:%c key:%d pict_type:%c "
+           "-> select:%f\n",
+           (int)select->var_values[VAR_N],
+           (int)select->var_values[VAR_PTS],
+           select->var_values[VAR_T],
+           (int)select->var_values[VAR_POS],
+           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_P ? 'P' :
+           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_T ? 'T' :
+           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_B ? 'B' : '?',
+           (int)select->var_values[VAR_KEY],
+           av_get_picture_type_char(select->var_values[VAR_PICT_TYPE]),
+           res);
+
+    select->var_values[VAR_N] += 1.0;
+
+    if (res) {
+        select->var_values[VAR_PREV_SELECTED_N]   = select->var_values[VAR_N];
+        select->var_values[VAR_PREV_SELECTED_PTS] = select->var_values[VAR_PTS];
+        select->var_values[VAR_PREV_SELECTED_T]   = select->var_values[VAR_T];
+        select->var_values[VAR_SELECTED_N] += 1.0;
+    }
+    return res;
+}
+
+static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
+{
+    SelectContext *select = inlink->dst->priv;
+
+    select->select = select_frame(inlink->dst, picref);
+    if (select->select) {
+        /* frame was requested through poll_frame */
+        if (select->cache_frames) {
+            if (!av_fifo_space(select->pending_frames))
+                av_log(inlink->dst, AV_LOG_ERROR,
+                       "Buffering limit reached, cannot cache more frames\n");
+            else
+                av_fifo_generic_write(select->pending_frames, &picref,
+                                      sizeof(picref), NULL);
+            return;
+        }
+        avfilter_start_frame(inlink->dst->outputs[0], avfilter_ref_buffer(picref, ~0));
+    }
+}
+
+static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
+{
+    SelectContext *select = inlink->dst->priv;
+
+    if (select->select && !select->cache_frames)
+        avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir);
+}
+
+static void end_frame(AVFilterLink *inlink)
+{
+    SelectContext *select = inlink->dst->priv;
+    AVFilterBufferRef *picref = inlink->cur_buf;
+
+    if (select->select) {
+        if (select->cache_frames)
+            return;
+        avfilter_end_frame(inlink->dst->outputs[0]);
+    }
+    avfilter_unref_buffer(picref);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SelectContext *select = ctx->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    select->select = 0;
+
+    if (av_fifo_size(select->pending_frames)) {
+        AVFilterBufferRef *picref;
+        av_fifo_generic_read(select->pending_frames, &picref, sizeof(picref), NULL);
+        avfilter_start_frame(outlink, avfilter_ref_buffer(picref, ~0));
+        avfilter_draw_slice(outlink, 0, outlink->h, 1);
+        avfilter_end_frame(outlink);
+        avfilter_unref_buffer(picref);
+        return 0;
+    }
+
+    while (!select->select) {
+        int ret = avfilter_request_frame(inlink);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int poll_frame(AVFilterLink *outlink)
+{
+    SelectContext *select = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    int count, ret;
+
+    if (!av_fifo_size(select->pending_frames)) {
+        if ((count = avfilter_poll_frame(inlink)) <= 0)
+            return count;
+        /* request frame from input, and apply select condition to it */
+        select->cache_frames = 1;
+        while (count-- && av_fifo_space(select->pending_frames)) {
+            ret = avfilter_request_frame(inlink);
+            if (ret < 0)
+                break;
+        }
+        select->cache_frames = 0;
+    }
+
+    return av_fifo_size(select->pending_frames)/sizeof(AVFilterBufferRef *);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SelectContext *select = ctx->priv;
+    AVFilterBufferRef *picref;
+    int i;
+
+    av_expr_free(select->expr);
+    select->expr = NULL;
+
+    for (i = 0; i < av_fifo_size(select->pending_frames)/sizeof(picref); i++) {
+        av_fifo_generic_read(select->pending_frames, &picref, sizeof(picref), NULL);
+        avfilter_unref_buffer(picref);
+    }
+    av_fifo_free(select->pending_frames);
+}
+
+AVFilter avfilter_vf_select = {
+    .name      = "select",
+    .description = NULL_IF_CONFIG_SMALL("Select frames to pass in output."),
+    .init      = init,
+    .uninit    = uninit,
+
+    .priv_size = sizeof(SelectContext),
+
+    .inputs    = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_VIDEO,
+                                    .get_video_buffer = avfilter_null_get_video_buffer,
+                                    .config_props     = config_input,
+                                    .start_frame      = start_frame,
+                                    .draw_slice       = draw_slice,
+                                    .end_frame        = end_frame },
+                                  { .name = NULL }},
+    .outputs   = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_VIDEO,
+                                    .poll_frame       = poll_frame,
+                                    .request_frame    = request_frame, },
+                                  { .name = NULL}},
+};

From 5dc8214420e4c48cdf48455329138db752d15cd0 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:27:19 +0200
Subject: [PATCH 267/830] lavf: add priv_class field to AVInputFormat.

---
 libavformat/avformat.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 3d4cc68f89..c29f9baf4d 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -425,6 +425,8 @@ typedef struct AVInputFormat {
     const AVMetadataConv *metadata_conv;
 #endif
 
+    const AVClass *priv_class; ///< AVClass for the private context
+
     /* private fields */
     struct AVInputFormat *next;
 } AVInputFormat;

From 17a5556db598ba1e5550d89ae3e339c1161a218a Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 30 Apr 2011 13:42:55 +0200
Subject: [PATCH 268/830] mpegts: add compute_pcr option.

Deprecate the corresponding AVFormatParameters field.
---
 libavformat/avformat.h |  7 ++++---
 libavformat/mpegts.c   | 22 +++++++++++++++++++++-
 libavformat/version.h  |  3 +++
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index c29f9baf4d..7e8cd80aee 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -236,9 +236,10 @@ typedef struct AVFormatParameters {
     int channel; /**< Used to select DV channel. */
     const char *standard; /**< TV standard, NTSC, PAL, SECAM */
     unsigned int mpeg2ts_raw:1;  /**< Force raw MPEG-2 transport stream output, if possible. */
-    unsigned int mpeg2ts_compute_pcr:1; /**< Compute exact PCR for each transport
-                                            stream packet (only meaningful if
-                                            mpeg2ts_raw is TRUE). */
+#if FF_API_FORMAT_PARAMETERS
+    /**< deprecated, use mpegtsraw demuxer-specific options instead */
+    attribute_deprecated unsigned int mpeg2ts_compute_pcr:1;
+#endif
     unsigned int initial_pause:1;       /**< Do not begin to play the stream
                                             immediately (RTSP only). */
     unsigned int prealloced_context:1;
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index e20d6966e3..20528c5bc4 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -25,6 +25,8 @@
 
 #include "libavutil/crc.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 #include "libavcodec/bytestream.h"
 #include "avformat.h"
 #include "mpegts.h"
@@ -86,6 +88,7 @@ struct Program {
 };
 
 struct MpegTSContext {
+    const AVClass *class;
     /* user data */
     AVFormatContext *stream;
     /** raw packet size, including FEC if present            */
@@ -122,6 +125,19 @@ struct MpegTSContext {
     MpegTSFilter *pids[NB_PID_MAX];
 };
 
+static const AVOption options[] = {
+    {"compute_pcr", "Compute exact PCR for each transport stream packet.", offsetof(MpegTSContext, mpeg2ts_compute_pcr), FF_OPT_TYPE_INT,
+     {.dbl = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass mpegtsraw_class = {
+    .class_name = "mpegtsraw demuxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 /* TS stream handling */
 
 enum MpegTSState {
@@ -1455,7 +1471,10 @@ static int mpegts_read_header(AVFormatContext *s,
     int64_t pos;
 
     if (ap) {
-        ts->mpeg2ts_compute_pcr = ap->mpeg2ts_compute_pcr;
+#if FF_API_FORMAT_PARAMETERS
+        if (ap->mpeg2ts_compute_pcr)
+            ts->mpeg2ts_compute_pcr = ap->mpeg2ts_compute_pcr;
+#endif
         if(ap->mpeg2ts_raw){
             av_log(s, AV_LOG_ERROR, "use mpegtsraw_demuxer!\n");
             return -1;
@@ -1878,4 +1897,5 @@ AVInputFormat ff_mpegtsraw_demuxer = {
 #ifdef USE_SYNCPOINT_SEARCH
     .read_seek2 = read_seek2,
 #endif
+    .priv_class = &mpegtsraw_class,
 };
diff --git a/libavformat/version.h b/libavformat/version.h
index 63f419125b..b4fdb6f8b5 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -68,5 +68,8 @@
 #ifndef FF_API_SDP_CREATE
 #define FF_API_SDP_CREATE              (LIBAVFORMAT_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_FORMAT_PARAMETERS
+#define FF_API_FORMAT_PARAMETERS       (LIBAVFORMAT_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVFORMAT_VERSION_H */

From fb37d573473d615cc323635fa5bc58fe16d14ee4 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:06:14 +0200
Subject: [PATCH 269/830] lavf: deprecate AVFormatParameters.mpeg2ts_raw.

It doesn't do anything except produce an error message when set.
---
 libavformat/avformat.h | 2 +-
 libavformat/mpegts.c   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 7e8cd80aee..35e6c15f81 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -235,8 +235,8 @@ typedef struct AVFormatParameters {
     enum PixelFormat pix_fmt;
     int channel; /**< Used to select DV channel. */
     const char *standard; /**< TV standard, NTSC, PAL, SECAM */
-    unsigned int mpeg2ts_raw:1;  /**< Force raw MPEG-2 transport stream output, if possible. */
 #if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
     /**< deprecated, use mpegtsraw demuxer-specific options instead */
     attribute_deprecated unsigned int mpeg2ts_compute_pcr:1;
 #endif
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 20528c5bc4..7c0f3818d5 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1470,16 +1470,16 @@ static int mpegts_read_header(AVFormatContext *s,
     int len;
     int64_t pos;
 
-    if (ap) {
 #if FF_API_FORMAT_PARAMETERS
+    if (ap) {
         if (ap->mpeg2ts_compute_pcr)
             ts->mpeg2ts_compute_pcr = ap->mpeg2ts_compute_pcr;
-#endif
         if(ap->mpeg2ts_raw){
             av_log(s, AV_LOG_ERROR, "use mpegtsraw_demuxer!\n");
             return -1;
         }
     }
+#endif
 
     /* read the first 1024 bytes to get packet size */
     pos = avio_tell(pb);

From a1a15a9993d45b2ec69e370ea1687dd7cb2b5794 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 19:02:08 +0200
Subject: [PATCH 270/830] sndio: add channels and sample_rate private options.

---
 libavdevice/sndio_common.h |  2 ++
 libavdevice/sndio_dec.c    | 24 +++++++++++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/libavdevice/sndio_common.h b/libavdevice/sndio_common.h
index 41c984ba79..e23b96d146 100644
--- a/libavdevice/sndio_common.h
+++ b/libavdevice/sndio_common.h
@@ -26,8 +26,10 @@
 #include <sndio.h>
 
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
 
 typedef struct {
+    AVClass *class;
     struct sio_hdl *hdl;
     enum CodecID codec_id;
     int64_t hwpos;
diff --git a/libavdevice/sndio_dec.c b/libavdevice/sndio_dec.c
index ff2adeb0af..abe13e1359 100644
--- a/libavdevice/sndio_dec.c
+++ b/libavdevice/sndio_dec.c
@@ -23,6 +23,7 @@
 #include <sndio.h>
 
 #include "libavformat/avformat.h"
+#include "libavutil/opt.h"
 
 #include "sndio_common.h"
 
@@ -33,16 +34,15 @@ static av_cold int audio_read_header(AVFormatContext *s1,
     AVStream *st;
     int ret;
 
-    if (ap->sample_rate <= 0 || ap->channels <= 0)
-        return AVERROR(EINVAL);
+    if (ap->sample_rate > 0)
+        s->sample_rate = ap->sample_rate;
+    if (ap->channels > 0)
+        s->channels = ap->channels;
 
     st = av_new_stream(s1, 0);
     if (!st)
         return AVERROR(ENOMEM);
 
-    s->sample_rate = ap->sample_rate;
-    s->channels    = ap->channels;
-
     ret = ff_sndio_open(s1, 0, s1->filename);
     if (ret < 0)
         return ret;
@@ -97,6 +97,19 @@ static av_cold int audio_read_close(AVFormatContext *s1)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "sample_rate", "", offsetof(SndioData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "channels",    "", offsetof(SndioData, channels),    FF_OPT_TYPE_INT, {.dbl = 2},     1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass sndio_demuxer_class = {
+    .class_name     = "sndio indev",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_sndio_demuxer = {
     .name           = "sndio",
     .long_name      = NULL_IF_CONFIG_SMALL("sndio audio capture"),
@@ -105,4 +118,5 @@ AVInputFormat ff_sndio_demuxer = {
     .read_packet    = audio_read_packet,
     .read_close     = audio_read_close,
     .flags          = AVFMT_NOFILE,
+    .priv_class     = &sndio_demuxer_class,
 };

From 003e63b6df7ba08cce18c1830df9c49dd3f0f391 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 19:02:40 +0200
Subject: [PATCH 271/830] oss: add channels and sample_rate private options.

---
 libavdevice/oss_audio.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c
index 7c4a65dda2..bcd6540021 100644
--- a/libavdevice/oss_audio.c
+++ b/libavdevice/oss_audio.c
@@ -37,12 +37,14 @@
 #include <sys/select.h>
 
 #include "libavutil/log.h"
+#include "libavutil/opt.h"
 #include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 
 #define AUDIO_BLOCK_SIZE 4096
 
 typedef struct {
+    AVClass *class;
     int fd;
     int sample_rate;
     int channels;
@@ -216,15 +218,15 @@ static int audio_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     AVStream *st;
     int ret;
 
-    if (ap->sample_rate <= 0 || ap->channels <= 0)
-        return -1;
+    if (ap->sample_rate > 0)
+        s->sample_rate = ap->sample_rate;
+    if (ap->channels > 0)
+        s->channels = ap->channels;
 
     st = av_new_stream(s1, 0);
     if (!st) {
         return AVERROR(ENOMEM);
     }
-    s->sample_rate = ap->sample_rate;
-    s->channels = ap->channels;
 
     ret = audio_open(s1, 0, s1->filename);
     if (ret < 0) {
@@ -293,6 +295,19 @@ static int audio_read_close(AVFormatContext *s1)
 }
 
 #if CONFIG_OSS_INDEV
+static const AVOption options[] = {
+    { "sample_rate", "", offsetof(AudioData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "channels",    "", offsetof(AudioData, channels),    FF_OPT_TYPE_INT, {.dbl = 2},     1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass oss_demuxer_class = {
+    .class_name     = "OSS demuxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_oss_demuxer = {
     "oss",
     NULL_IF_CONFIG_SMALL("Open Sound System capture"),
@@ -302,6 +317,7 @@ AVInputFormat ff_oss_demuxer = {
     audio_read_packet,
     audio_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &oss_demuxer_class,
 };
 #endif
 

From 2ea8faf39ff6f21c2faaf8f9bd060a6636ea65fc Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 19:03:10 +0200
Subject: [PATCH 272/830] ALSA: add channels and sample_rate private options.

---
 libavdevice/alsa-audio-dec.c | 37 +++++++++++++++++++++---------------
 libavdevice/alsa-audio.h     |  4 ++++
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/libavdevice/alsa-audio-dec.c b/libavdevice/alsa-audio-dec.c
index c467fc097f..285d338ff5 100644
--- a/libavdevice/alsa-audio-dec.c
+++ b/libavdevice/alsa-audio-dec.c
@@ -47,6 +47,7 @@
 
 #include <alsa/asoundlib.h>
 #include "libavformat/avformat.h"
+#include "libavutil/opt.h"
 
 #include "alsa-audio.h"
 
@@ -56,21 +57,14 @@ static av_cold int audio_read_header(AVFormatContext *s1,
     AlsaData *s = s1->priv_data;
     AVStream *st;
     int ret;
-    unsigned int sample_rate;
     enum CodecID codec_id;
     snd_pcm_sw_params_t *sw_params;
 
-    if (ap->sample_rate <= 0) {
-        av_log(s1, AV_LOG_ERROR, "Bad sample rate %d\n", ap->sample_rate);
+    if (ap->sample_rate > 0)
+        s->sample_rate = ap->sample_rate;
 
-        return AVERROR(EIO);
-    }
-
-    if (ap->channels <= 0) {
-        av_log(s1, AV_LOG_ERROR, "Bad channels number %d\n", ap->channels);
-
-        return AVERROR(EIO);
-    }
+    if (ap->channels > 0)
+        s->channels = ap->channels;
 
     st = av_new_stream(s1, 0);
     if (!st) {
@@ -78,10 +72,9 @@ static av_cold int audio_read_header(AVFormatContext *s1,
 
         return AVERROR(ENOMEM);
     }
-    sample_rate = ap->sample_rate;
     codec_id    = s1->audio_codec_id;
 
-    ret = ff_alsa_open(s1, SND_PCM_STREAM_CAPTURE, &sample_rate, ap->channels,
+    ret = ff_alsa_open(s1, SND_PCM_STREAM_CAPTURE, &s->sample_rate, s->channels,
         &codec_id);
     if (ret < 0) {
         return AVERROR(EIO);
@@ -113,8 +106,8 @@ static av_cold int audio_read_header(AVFormatContext *s1,
     /* take real parameters */
     st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
     st->codec->codec_id    = codec_id;
-    st->codec->sample_rate = sample_rate;
-    st->codec->channels    = ap->channels;
+    st->codec->sample_rate = s->sample_rate;
+    st->codec->channels    = s->channels;
     av_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
 
     return 0;
@@ -163,6 +156,19 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "sample_rate", "", offsetof(AlsaData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "channels",    "", offsetof(AlsaData, channels),    FF_OPT_TYPE_INT, {.dbl = 2},     1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass alsa_demuxer_class = {
+    .class_name     = "ALSA demuxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_alsa_demuxer = {
     "alsa",
     NULL_IF_CONFIG_SMALL("ALSA audio input"),
@@ -172,4 +178,5 @@ AVInputFormat ff_alsa_demuxer = {
     audio_read_packet,
     ff_alsa_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &alsa_demuxer_class,
 };
diff --git a/libavdevice/alsa-audio.h b/libavdevice/alsa-audio.h
index 7a1b01811b..32c07426ef 100644
--- a/libavdevice/alsa-audio.h
+++ b/libavdevice/alsa-audio.h
@@ -33,6 +33,7 @@
 #include <alsa/asoundlib.h>
 #include "config.h"
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
 
 /* XXX: we make the assumption that the soundcard accepts this format */
 /* XXX: find better solution with "preinit" method, needed also in
@@ -40,9 +41,12 @@
 #define DEFAULT_CODEC_ID AV_NE(CODEC_ID_PCM_S16BE, CODEC_ID_PCM_S16LE)
 
 typedef struct {
+    AVClass *class;
     snd_pcm_t *h;
     int frame_size;  ///< preferred size for reads and writes
     int period_size; ///< bytes per sample * channels
+    int sample_rate; ///< sample rate set by user
+    int channels;    ///< number of channels set by user
 } AlsaData;
 
 /**

From 5b3865fc5f9675dd55f7dd7d5ae3b9ed58c3310c Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 20:05:55 +0200
Subject: [PATCH 273/830] rawdec: add sample_rate/channels private options.

---
 libavformat/pcmdec.c |  3 ++-
 libavformat/rawdec.c | 36 +++++++++++++++++++++++++++++++-----
 libavformat/rawdec.h |  9 +++++++++
 3 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
index a5b82789d6..343bbf0ae2 100644
--- a/libavformat/pcmdec.c
+++ b/libavformat/pcmdec.c
@@ -50,7 +50,7 @@ static int raw_read_packet(AVFormatContext *s, AVPacket *pkt)
 AVInputFormat ff_pcm_ ## name ## _demuxer = {\
     #name,\
     NULL_IF_CONFIG_SMALL(long_name),\
-    0,\
+    sizeof(RawAudioDemuxerContext),\
     NULL,\
     ff_raw_read_header,\
     raw_read_packet,\
@@ -59,6 +59,7 @@ AVInputFormat ff_pcm_ ## name ## _demuxer = {\
     .flags= AVFMT_GENERIC_INDEX,\
     .extensions = ext,\
     .value = codec,\
+    .priv_class = &ff_rawaudio_demuxer_class,\
 };
 
 PCMDEF(f64be, "PCM 64 bit floating-point big-endian format",
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 7df63a128c..81a6459307 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -23,6 +23,7 @@
 #include "avformat.h"
 #include "avio_internal.h"
 #include "rawdec.h"
+#include "libavutil/opt.h"
 
 /* raw input */
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
@@ -43,15 +44,26 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         st->codec->codec_id = id;
 
         switch(st->codec->codec_type) {
-        case AVMEDIA_TYPE_AUDIO:
-            st->codec->sample_rate = ap->sample_rate;
-            if(ap->channels) st->codec->channels = ap->channels;
-            else             st->codec->channels = 1;
+        case AVMEDIA_TYPE_AUDIO: {
+            RawAudioDemuxerContext *s1 = s->priv_data;
+
+            if (ap->sample_rate)
+                st->codec->sample_rate = ap->sample_rate;
+            if (ap->channels)
+                st->codec->channels    = ap->channels;
+            else st->codec->channels   = 1;
+
+            if (s1->sample_rate)
+                st->codec->sample_rate = s1->sample_rate;
+            if (s1->channels)
+                st->codec->channels    = s1->channels;
+
             st->codec->bits_per_coded_sample = av_get_bits_per_sample(st->codec->codec_id);
             assert(st->codec->bits_per_coded_sample > 0);
             st->codec->block_align = st->codec->bits_per_coded_sample*st->codec->channels/8;
             av_set_pts_info(st, 64, 1, st->codec->sample_rate);
             break;
+            }
         case AVMEDIA_TYPE_VIDEO:
             if(ap->time_base.num)
                 av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
@@ -138,17 +150,31 @@ int ff_raw_video_read_header(AVFormatContext *s,
 
 /* Note: Do not forget to add new entries to the Makefile as well. */
 
+static const AVOption audio_options[] = {
+    { "sample_rate", "", offsetof(RawAudioDemuxerContext, sample_rate), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "channels",    "", offsetof(RawAudioDemuxerContext, channels),    FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+const AVClass ff_rawaudio_demuxer_class = {
+    .class_name     = "rawaudio demuxer",
+    .item_name      = av_default_item_name,
+    .option         = audio_options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 #if CONFIG_G722_DEMUXER
 AVInputFormat ff_g722_demuxer = {
     "g722",
     NULL_IF_CONFIG_SMALL("raw G.722"),
-    0,
+    sizeof(RawAudioDemuxerContext),
     NULL,
     ff_raw_read_header,
     ff_raw_read_partial_packet,
     .flags= AVFMT_GENERIC_INDEX,
     .extensions = "g722,722",
     .value = CODEC_ID_ADPCM_G722,
+    .priv_class = &ff_rawaudio_demuxer_class,
 };
 #endif
 
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index 82672b7b53..e473eb2aac 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -23,6 +23,15 @@
 #define AVFORMAT_RAWDEC_H
 
 #include "avformat.h"
+#include "libavutil/log.h"
+
+typedef struct RawAudioDemuxerContext {
+    AVClass *class;
+    int sample_rate;
+    int channels;
+} RawAudioDemuxerContext;
+
+extern const AVClass ff_rawaudio_demuxer_class;
 
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap);
 

From bffd4dd1d36b1e9b9479c81b370c134ffb434e1a Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 20:13:28 +0200
Subject: [PATCH 274/830] lavf: deprecate
 AVFormatParameters.{channels,sample_rate}.

---
 libavdevice/alsa-audio-dec.c | 2 ++
 libavdevice/oss_audio.c      | 2 ++
 libavdevice/sndio_dec.c      | 2 ++
 libavformat/avformat.h       | 6 ++++--
 libavformat/rawdec.c         | 2 ++
 5 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/libavdevice/alsa-audio-dec.c b/libavdevice/alsa-audio-dec.c
index 285d338ff5..937f6a6ef7 100644
--- a/libavdevice/alsa-audio-dec.c
+++ b/libavdevice/alsa-audio-dec.c
@@ -60,11 +60,13 @@ static av_cold int audio_read_header(AVFormatContext *s1,
     enum CodecID codec_id;
     snd_pcm_sw_params_t *sw_params;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->sample_rate > 0)
         s->sample_rate = ap->sample_rate;
 
     if (ap->channels > 0)
         s->channels = ap->channels;
+#endif
 
     st = av_new_stream(s1, 0);
     if (!st) {
diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c
index bcd6540021..af46ea890b 100644
--- a/libavdevice/oss_audio.c
+++ b/libavdevice/oss_audio.c
@@ -218,10 +218,12 @@ static int audio_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     AVStream *st;
     int ret;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->sample_rate > 0)
         s->sample_rate = ap->sample_rate;
     if (ap->channels > 0)
         s->channels = ap->channels;
+#endif
 
     st = av_new_stream(s1, 0);
     if (!st) {
diff --git a/libavdevice/sndio_dec.c b/libavdevice/sndio_dec.c
index abe13e1359..2690ee395c 100644
--- a/libavdevice/sndio_dec.c
+++ b/libavdevice/sndio_dec.c
@@ -34,10 +34,12 @@ static av_cold int audio_read_header(AVFormatContext *s1,
     AVStream *st;
     int ret;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->sample_rate > 0)
         s->sample_rate = ap->sample_rate;
     if (ap->channels > 0)
         s->channels = ap->channels;
+#endif
 
     st = av_new_stream(s1, 0);
     if (!st)
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 35e6c15f81..64bbd22396 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -228,8 +228,10 @@ typedef struct AVProbeData {
 
 typedef struct AVFormatParameters {
     AVRational time_base;
-    int sample_rate;
-    int channels;
+#if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated int sample_rate;
+    attribute_deprecated int channels;
+#endif
     int width;
     int height;
     enum PixelFormat pix_fmt;
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 81a6459307..b545dbd6d7 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -47,11 +47,13 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         case AVMEDIA_TYPE_AUDIO: {
             RawAudioDemuxerContext *s1 = s->priv_data;
 
+#if FF_API_FORMAT_PARAMETERS
             if (ap->sample_rate)
                 st->codec->sample_rate = ap->sample_rate;
             if (ap->channels)
                 st->codec->channels    = ap->channels;
             else st->codec->channels   = 1;
+#endif
 
             if (s1->sample_rate)
                 st->codec->sample_rate = s1->sample_rate;

From e199eb44fdccd06db7d2c373bc0324351543ded0 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 20:45:36 +0200
Subject: [PATCH 275/830] bktr: add a private option for video standard.

---
 libavdevice/bktr.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index dad5c834f1..af184b98d2 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -25,6 +25,8 @@
  */
 
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 #if HAVE_DEV_BKTR_IOCTL_METEOR_H && HAVE_DEV_BKTR_IOCTL_BT848_H
 # include <dev/bktr/ioctl_meteor.h>
 # include <dev/bktr/ioctl_bt848.h>
@@ -47,12 +49,14 @@
 #include <strings.h>
 
 typedef struct {
+    AVClass *class;
     int video_fd;
     int tuner_fd;
     int width, height;
     int frame_rate;
     int frame_rate_base;
     uint64_t per_frame;
+    int standard;
 } VideoData;
 
 
@@ -245,7 +249,6 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int width, height;
     int frame_rate;
     int frame_rate_base;
-    int format = -1;
 
     if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0)
         return -1;
@@ -276,14 +279,14 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     if (ap->standard) {
         if (!strcasecmp(ap->standard, "pal"))
-            format = PAL;
+            s->standard = PAL;
         else if (!strcasecmp(ap->standard, "secam"))
-            format = SECAM;
+            s->standard = SECAM;
         else if (!strcasecmp(ap->standard, "ntsc"))
-            format = NTSC;
+            s->standard = NTSC;
     }
 
-    if (bktr_init(s1->filename, width, height, format,
+    if (bktr_init(s1->filename, width, height, s->standard,
             &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0)
         return AVERROR(EIO);
 
@@ -311,6 +314,24 @@ static int grab_read_close(AVFormatContext *s1)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "standard", "", offsetof(VideoData, standard), FF_OPT_TYPE_INT, {.dbl = VIDEO_FORMAT}, PAL, NTSCJ, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "PAL",      "", 0, FF_OPT_TYPE_CONST, {.dbl = PAL},   0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "NTSC",     "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSC},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "SECAM",    "", 0, FF_OPT_TYPE_CONST, {.dbl = SECAM}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "PALN",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALN},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "PALM",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALM},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "NTSCJ",    "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSCJ}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { NULL },
+};
+
+static const AVClass bktr_class = {
+    .class_name = "BKTR grab interface",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_bktr_demuxer = {
     "bktr",
     NULL_IF_CONFIG_SMALL("video grab"),
@@ -320,4 +341,5 @@ AVInputFormat ff_bktr_demuxer = {
     grab_read_packet,
     grab_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &bktr_class,
 };

From eb040dbbd39898f03e5057a21b0981a8ffe9f37b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 20:58:07 +0200
Subject: [PATCH 276/830] dv1394: add a private option for video standard.

---
 libavdevice/dv1394.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index 70f928ed80..d0760ef269 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -30,6 +30,8 @@
 #include <time.h>
 #include <strings.h>
 
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 #include "libavformat/avformat.h"
 
 #undef DV1394_DEBUG
@@ -38,6 +40,7 @@
 #include "dv1394.h"
 
 struct dv1394_data {
+    AVClass *class;
     int fd;
     int channel;
     int format;
@@ -90,10 +93,12 @@ static int dv1394_read_header(AVFormatContext * context, AVFormatParameters * ap
     if (!dv->dv_demux)
         goto failed;
 
-    if (ap->standard && !strcasecmp(ap->standard, "pal"))
-        dv->format = DV1394_PAL;
-    else
-        dv->format = DV1394_NTSC;
+    if (ap->standard) {
+       if (!strcasecmp(ap->standard, "pal"))
+           dv->format = DV1394_PAL;
+       else
+           dv->format = DV1394_NTSC;
+    }
 
     if (ap->channel)
         dv->channel = ap->channel;
@@ -227,6 +232,20 @@ static int dv1394_close(AVFormatContext * context)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "standard", "", offsetof(struct dv1394_data, format), FF_OPT_TYPE_INT, {.dbl = DV1394_NTSC}, DV1394_PAL, DV1394_NTSC, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "PAL",      "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_PAL},   0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "NTSC",     "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_NTSC},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { NULL },
+};
+
+static const AVClass dv1394_class = {
+    .class_name = "DV1394 indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_dv1394_demuxer = {
     .name           = "dv1394",
     .long_name      = NULL_IF_CONFIG_SMALL("DV1394 A/V grab"),
@@ -234,5 +253,6 @@ AVInputFormat ff_dv1394_demuxer = {
     .read_header    = dv1394_read_header,
     .read_packet    = dv1394_read_packet,
     .read_close     = dv1394_close,
-    .flags          = AVFMT_NOFILE
+    .flags          = AVFMT_NOFILE,
+    .priv_class     = &dv1394_class,
 };

From a861ffeffa40dfc4f3d20c221c1accca99db23df Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:21:59 +0200
Subject: [PATCH 277/830] v4l: add a private option for video standard.

---
 libavdevice/v4l.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index d97282b7e7..8c1134536a 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -23,6 +23,8 @@
 #include "config.h"
 #include "libavutil/rational.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 #include "libavformat/avformat.h"
 #include "libavcodec/dsputil.h"
 #include <unistd.h>
@@ -36,6 +38,7 @@
 #include <strings.h>
 
 typedef struct {
+    AVClass *class;
     int fd;
     int frame_format; /* see VIDEO_PALETTE_xxx */
     int use_mmap;
@@ -49,6 +52,7 @@ typedef struct {
     struct video_mbuf gb_buffers;
     struct video_mmap gb_buf;
     int gb_frame;
+    int standard;
 } VideoData;
 
 static const struct {
@@ -131,13 +135,16 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     }
 
     /* set tv standard */
-    if (ap->standard && !ioctl(video_fd, VIDIOCGTUNER, &tuner)) {
-        if (!strcasecmp(ap->standard, "pal"))
-            tuner.mode = VIDEO_MODE_PAL;
-        else if (!strcasecmp(ap->standard, "secam"))
-            tuner.mode = VIDEO_MODE_SECAM;
-        else
-            tuner.mode = VIDEO_MODE_NTSC;
+    if (!ioctl(video_fd, VIDIOCGTUNER, &tuner)) {
+        if (ap->standard) {
+            if (!strcasecmp(ap->standard, "pal"))
+                s->standard = VIDEO_MODE_PAL;
+            else if (!strcasecmp(ap->standard, "secam"))
+                s->standard = VIDEO_MODE_SECAM;
+            else
+                s->standard = VIDEO_MODE_NTSC;
+        }
+        tuner.mode = s->standard;
         ioctl(video_fd, VIDIOCSTUNER, &tuner);
     }
 
@@ -333,6 +340,21 @@ static int grab_read_close(AVFormatContext *s1)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "standard", "", offsetof(VideoData, standard), FF_OPT_TYPE_INT, {.dbl = VIDEO_MODE_NTSC}, VIDEO_MODE_PAL, VIDEO_MODE_NTSC, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "PAL",   "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_PAL},   0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "SECAM", "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_SECAM}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "NTSC",  "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_NTSC},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { NULL },
+};
+
+static const AVClass v4l_class = {
+    .class_name = "V4L indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_v4l_demuxer = {
     "video4linux",
     NULL_IF_CONFIG_SMALL("Video4Linux device grab"),
@@ -342,4 +364,5 @@ AVInputFormat ff_v4l_demuxer = {
     grab_read_packet,
     grab_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &v4l_class,
 };

From b3da2692115ea17190544883d15efa36219da99e Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:21:59 +0200
Subject: [PATCH 278/830] v4l2: add a private option for video standard.

---
 libavdevice/v4l2.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 8e0a6e64db..dca31a8140 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -44,6 +44,8 @@
 #include <time.h>
 #include <strings.h>
 #include "libavutil/imgutils.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 
 static const int desired_video_buffers = 256;
 
@@ -54,6 +56,7 @@ enum io_method {
 };
 
 struct video_data {
+    AVClass *class;
     int fd;
     int frame_format; /* V4L2_PIX_FMT_* */
     enum io_method io_method;
@@ -64,6 +67,7 @@ struct video_data {
     int buffers;
     void **buf_start;
     unsigned int *buf_len;
+    char *standard;
 };
 
 struct buff_data {
@@ -467,31 +471,37 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     }
 
     if (ap->standard) {
+        av_freep(&s->standard);
+        s->standard = av_strdup(ap->standard);
+    }
+
+    if (s->standard) {
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s\n",
-               ap->standard);
+               s->standard);
         /* set tv standard */
         memset (&standard, 0, sizeof (standard));
         for(i=0;;i++) {
             standard.index = i;
             if (ioctl(s->fd, VIDIOC_ENUMSTD, &standard) < 0) {
                 av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n",
-                       ap->standard);
+                       s->standard);
                 return AVERROR(EIO);
             }
 
-            if (!strcasecmp(standard.name, ap->standard)) {
+            if (!strcasecmp(standard.name, s->standard)) {
                 break;
             }
         }
 
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s, id: %"PRIu64"\n",
-               ap->standard, (uint64_t)standard.id);
+               s->standard, (uint64_t)standard.id);
         if (ioctl(s->fd, VIDIOC_S_STD, &standard.id) < 0) {
             av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n",
-                   ap->standard);
+                   s->standard);
             return AVERROR(EIO);
         }
     }
+    av_freep(&s->standard);
 
     if (ap->time_base.num && ap->time_base.den) {
         av_log(s1, AV_LOG_DEBUG, "Setting time per frame to %d/%d\n",
@@ -680,6 +690,18 @@ static int v4l2_read_close(AVFormatContext *s1)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass v4l2_class = {
+    .class_name = "V4L2 indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_v4l2_demuxer = {
     "video4linux2",
     NULL_IF_CONFIG_SMALL("Video4Linux2 device grab"),
@@ -689,4 +711,5 @@ AVInputFormat ff_v4l2_demuxer = {
     v4l2_read_packet,
     v4l2_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &v4l2_class,
 };

From fc68a8f7030227fc4fa8d83b9051aaf598cd12dd Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:40:44 +0200
Subject: [PATCH 279/830] lavf: deprecate AVFormatParameters.standard.

---
 libavdevice/bktr.c     | 2 ++
 libavdevice/dv1394.c   | 2 ++
 libavdevice/v4l.c      | 2 ++
 libavdevice/v4l2.c     | 2 ++
 libavformat/avformat.h | 2 +-
 5 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index af184b98d2..821567199e 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -277,6 +277,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->time_base.den = frame_rate;
     st->codec->time_base.num = frame_rate_base;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
         if (!strcasecmp(ap->standard, "pal"))
             s->standard = PAL;
@@ -285,6 +286,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         else if (!strcasecmp(ap->standard, "ntsc"))
             s->standard = NTSC;
     }
+#endif
 
     if (bktr_init(s1->filename, width, height, s->standard,
             &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0)
diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index d0760ef269..2515f78c8f 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -93,12 +93,14 @@ static int dv1394_read_header(AVFormatContext * context, AVFormatParameters * ap
     if (!dv->dv_demux)
         goto failed;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
        if (!strcasecmp(ap->standard, "pal"))
            dv->format = DV1394_PAL;
        else
            dv->format = DV1394_NTSC;
     }
+#endif
 
     if (ap->channel)
         dv->channel = ap->channel;
diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index 8c1134536a..54d0394ff5 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -136,6 +136,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     /* set tv standard */
     if (!ioctl(video_fd, VIDIOCGTUNER, &tuner)) {
+#if FF_API_FORMAT_PARAMETERS
         if (ap->standard) {
             if (!strcasecmp(ap->standard, "pal"))
                 s->standard = VIDEO_MODE_PAL;
@@ -144,6 +145,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
             else
                 s->standard = VIDEO_MODE_NTSC;
         }
+#endif
         tuner.mode = s->standard;
         ioctl(video_fd, VIDIOCSTUNER, &tuner);
     }
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index dca31a8140..1c3059d850 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -470,10 +470,12 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         }
     }
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
         av_freep(&s->standard);
         s->standard = av_strdup(ap->standard);
     }
+#endif
 
     if (s->standard) {
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s\n",
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 64bbd22396..424fc920b4 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -236,8 +236,8 @@ typedef struct AVFormatParameters {
     int height;
     enum PixelFormat pix_fmt;
     int channel; /**< Used to select DV channel. */
-    const char *standard; /**< TV standard, NTSC, PAL, SECAM */
 #if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */
     attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
     /**< deprecated, use mpegtsraw demuxer-specific options instead */
     attribute_deprecated unsigned int mpeg2ts_compute_pcr:1;

From a02fd06ab76c0abd7ef32f332a08177e6014b3a8 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:53:44 +0200
Subject: [PATCH 280/830] v4l2: add a private option for channel.

---
 libavdevice/v4l2.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 1c3059d850..59af2c83ec 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -68,6 +68,7 @@ struct video_data {
     void **buf_start;
     unsigned int *buf_len;
     char *standard;
+    int channel;
 };
 
 struct buff_data {
@@ -452,23 +453,24 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 
     streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-    if (ap->channel>=0) {
+    if (ap->channel > 0)
+        s->channel = ap->channel;
+
         /* set tv video input */
         memset (&input, 0, sizeof (input));
-        input.index = ap->channel;
+        input.index = s->channel;
         if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) {
             av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n");
             return AVERROR(EIO);
         }
 
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n",
-               ap->channel, input.name);
+               s->channel, input.name);
         if (ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) {
             av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n",
-                   ap->channel);
+                   s->channel);
             return AVERROR(EIO);
         }
-    }
 
 #if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
@@ -694,6 +696,7 @@ static int v4l2_read_close(AVFormatContext *s1)
 
 static const AVOption options[] = {
     { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 

From 3d2a418605b23b475b4217a93e8e60660154e198 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:23:29 +0200
Subject: [PATCH 281/830] v4l2: reindent.

---
 libavdevice/v4l2.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 59af2c83ec..0385a2c6c0 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -456,21 +456,21 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     if (ap->channel > 0)
         s->channel = ap->channel;
 
-        /* set tv video input */
-        memset (&input, 0, sizeof (input));
-        input.index = s->channel;
-        if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) {
-            av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n");
-            return AVERROR(EIO);
-        }
+    /* set tv video input */
+    memset (&input, 0, sizeof (input));
+    input.index = s->channel;
+    if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) {
+        av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n");
+        return AVERROR(EIO);
+    }
 
-        av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n",
-               s->channel, input.name);
-        if (ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) {
-            av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n",
-                   s->channel);
-            return AVERROR(EIO);
-        }
+    av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n",
+            s->channel, input.name);
+    if (ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) {
+        av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n",
+                s->channel);
+        return AVERROR(EIO);
+    }
 
 #if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {

From 986f4f491840d7c440ef4ccde7adcef219be26f3 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 21:55:44 +0200
Subject: [PATCH 282/830] dv1394: add a private option for channel.

---
 libavdevice/dv1394.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index 2515f78c8f..0981eff53c 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -104,8 +104,6 @@ static int dv1394_read_header(AVFormatContext * context, AVFormatParameters * ap
 
     if (ap->channel)
         dv->channel = ap->channel;
-    else
-        dv->channel = DV1394_DEFAULT_CHANNEL;
 
     /* Open and initialize DV1394 device */
     dv->fd = open(context->filename, O_RDONLY);
@@ -238,6 +236,7 @@ static const AVOption options[] = {
     { "standard", "", offsetof(struct dv1394_data, format), FF_OPT_TYPE_INT, {.dbl = DV1394_NTSC}, DV1394_PAL, DV1394_NTSC, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "PAL",      "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_PAL},   0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "NTSC",     "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_NTSC},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "channel",  "", offsetof(struct dv1394_data, channel), FF_OPT_TYPE_INT, {.dbl = DV1394_DEFAULT_CHANNEL}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 

From 79405e57dc1a1e2746c87cba677052d742dbb7ee Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 22:01:36 +0200
Subject: [PATCH 283/830] libdc1394: add a private option for channel.

---
 libavdevice/libdc1394.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index e637af5f74..a8406b8c2e 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -22,6 +22,8 @@
 
 #include "config.h"
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 
 #if HAVE_LIBDC1394_2
 #include <dc1394/dc1394.h>
@@ -45,9 +47,11 @@
 #undef free
 
 typedef struct dc1394_data {
+    AVClass *class;
 #if HAVE_LIBDC1394_1
     raw1394handle_t handle;
     dc1394_cameracapture camera;
+    int channel;
 #elif HAVE_LIBDC1394_2
     dc1394_t *d;
     dc1394camera_t *camera;
@@ -155,6 +159,9 @@ static int dc1394_v1_read_header(AVFormatContext *c, AVFormatParameters * ap)
     if (dc1394_read_common(c,ap,&fmt,&fps) != 0)
         return -1;
 
+    if (ap->channel)
+        dc1394->channel = ap->channel;
+
     /* Now let us prep the hardware. */
     dc1394->handle = dc1394_create_handle(0); /* FIXME: gotta have ap->port */
     if (!dc1394->handle) {
@@ -162,11 +169,11 @@ static int dc1394_v1_read_header(AVFormatContext *c, AVFormatParameters * ap)
         goto out;
     }
     camera_nodes = dc1394_get_camera_nodes(dc1394->handle, &res, 1);
-    if (!camera_nodes || camera_nodes[ap->channel] == DC1394_NO_CAMERA) {
-        av_log(c, AV_LOG_ERROR, "There's no IIDC camera on the channel %d\n", ap->channel);
+    if (!camera_nodes || camera_nodes[dc1394->channel] == DC1394_NO_CAMERA) {
+        av_log(c, AV_LOG_ERROR, "There's no IIDC camera on the channel %d\n", dc1394->channel);
         goto out_handle;
     }
-    res = dc1394_dma_setup_capture(dc1394->handle, camera_nodes[ap->channel],
+    res = dc1394_dma_setup_capture(dc1394->handle, camera_nodes[dc1394->channel],
                                    0,
                                    FORMAT_VGA_NONCOMPRESSED,
                                    fmt->frame_size_id,
@@ -236,6 +243,20 @@ static int dc1394_v1_close(AVFormatContext * context)
     return 0;
 }
 
+static const AVOption options[] = {
+#if HAVE_LIBDC1394_1
+    { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+#endif
+    { NULL },
+};
+
+static const AVClass libdc1394_class = {
+    .class_name = "libdc1394 indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 #elif HAVE_LIBDC1394_2
 static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap)
 {
@@ -356,6 +377,7 @@ AVInputFormat ff_libdc1394_demuxer = {
     .read_packet    = dc1394_v2_read_packet,
     .read_close     = dc1394_v2_close,
     .flags          = AVFMT_NOFILE
+    .priv_class     = &libdc1394_class,
 };
 
 #endif
@@ -367,6 +389,7 @@ AVInputFormat ff_libdc1394_demuxer = {
     .read_header    = dc1394_v1_read_header,
     .read_packet    = dc1394_v1_read_packet,
     .read_close     = dc1394_v1_close,
-    .flags          = AVFMT_NOFILE
+    .flags          = AVFMT_NOFILE,
+    .priv_class     = &libdc1394_class,
 };
 #endif

From d20576d01b6489e37813302c208df01068418bfb Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 22:06:09 +0200
Subject: [PATCH 284/830] lavf: deprecate AVFormatParameters.channel.

---
 libavdevice/dv1394.c    | 2 +-
 libavdevice/libdc1394.c | 2 ++
 libavdevice/v4l2.c      | 2 ++
 libavformat/avformat.h  | 2 +-
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index 0981eff53c..c9b7a69d6f 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -100,10 +100,10 @@ static int dv1394_read_header(AVFormatContext * context, AVFormatParameters * ap
        else
            dv->format = DV1394_NTSC;
     }
-#endif
 
     if (ap->channel)
         dv->channel = ap->channel;
+#endif
 
     /* Open and initialize DV1394 device */
     dv->fd = open(context->filename, O_RDONLY);
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index a8406b8c2e..96e9e9b88b 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -159,8 +159,10 @@ static int dc1394_v1_read_header(AVFormatContext *c, AVFormatParameters * ap)
     if (dc1394_read_common(c,ap,&fmt,&fps) != 0)
         return -1;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->channel)
         dc1394->channel = ap->channel;
+#endif
 
     /* Now let us prep the hardware. */
     dc1394->handle = dc1394_create_handle(0); /* FIXME: gotta have ap->port */
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 0385a2c6c0..566ee92801 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -453,8 +453,10 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 
     streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
+#if FF_API_FORMAT_PARAMETERS
     if (ap->channel > 0)
         s->channel = ap->channel;
+#endif
 
     /* set tv video input */
     memset (&input, 0, sizeof (input));
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 424fc920b4..11dbe8b2de 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -235,8 +235,8 @@ typedef struct AVFormatParameters {
     int width;
     int height;
     enum PixelFormat pix_fmt;
-    int channel; /**< Used to select DV channel. */
 #if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated int channel; /**< Used to select DV channel. */
     attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */
     attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
     /**< deprecated, use mpegtsraw demuxer-specific options instead */

From d2bc4da15becf40c54e37af045963e3d13db5d8d Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 22 May 2011 21:54:06 +0200
Subject: [PATCH 285/830] ffmpeg: purge redundant AVInputStream.index.

AVStream.index stores the same thing.
---
 ffmpeg.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 86732535c8..ddc48c2354 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -307,7 +307,6 @@ static int nb_output_streams_for_file[MAX_FILES] = { 0 };
 
 typedef struct AVInputStream {
     int file_index;
-    int index;
     AVStream *st;
     int discard;             /* true if stream data should be discarded */
     int decoding_needed;     /* true if the packets must be decoded in 'raw_fifo' */
@@ -802,7 +801,7 @@ need_realloc:
     if ((ost->audio_resample && !ost->resample) || resample_changed) {
         if (resample_changed) {
             av_log(NULL, AV_LOG_INFO, "Input stream #%d.%d frame changed from rate:%d fmt:%s ch:%d to rate:%d fmt:%s ch:%d\n",
-                   ist->file_index, ist->index,
+                   ist->file_index, ist->st->index,
                    ost->resample_sample_rate, av_get_sample_fmt_name(ost->resample_sample_fmt), ost->resample_channels,
                    dec->sample_rate, av_get_sample_fmt_name(dec->sample_fmt), dec->channels);
             ost->resample_sample_fmt  = dec->sample_fmt;
@@ -1165,7 +1164,7 @@ static void do_video_out(AVFormatContext *s,
     if (resample_changed) {
         av_log(NULL, AV_LOG_INFO,
                "Input stream #%d.%d frame changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n",
-               ist->file_index, ist->index,
+               ist->file_index, ist->st->index,
                ost->resample_width, ost->resample_height, avcodec_get_pix_fmt_name(ost->resample_pix_fmt),
                dec->width         , dec->height         , avcodec_get_pix_fmt_name(dec->pix_fmt));
         if(!ost->video_resample)
@@ -1974,7 +1973,6 @@ static int transcode(AVFormatContext **output_files,
             ist = ist_table[j++];
             ist->st = is->streams[k];
             ist->file_index = i;
-            ist->index = k;
             ist->discard = 1; /* the stream is discarded by default
                                  (changed later) */
 
@@ -2345,7 +2343,7 @@ static int transcode(AVFormatContext **output_files,
                 codec = avcodec_find_decoder(ist->st->codec->codec_id);
             if (!codec) {
                 snprintf(error, sizeof(error), "Decoder (codec id %d) not found for input stream #%d.%d",
-                        ist->st->codec->codec_id, ist->file_index, ist->index);
+                        ist->st->codec->codec_id, ist->file_index, ist->st->index);
                 ret = AVERROR(EINVAL);
                 goto dump_format;
             }
@@ -2362,7 +2360,7 @@ static int transcode(AVFormatContext **output_files,
 
             if (avcodec_open(ist->st->codec, codec) < 0) {
                 snprintf(error, sizeof(error), "Error while opening decoder for input stream #%d.%d",
-                        ist->file_index, ist->index);
+                        ist->file_index, ist->st->index);
                 ret = AVERROR(EINVAL);
                 goto dump_format;
             }
@@ -2498,13 +2496,13 @@ static int transcode(AVFormatContext **output_files,
             ost = ost_table[i];
             fprintf(stderr, "  Stream #%d.%d -> #%d.%d",
                     ist_table[ost->source_index]->file_index,
-                    ist_table[ost->source_index]->index,
+                    ist_table[ost->source_index]->st->index,
                     ost->file_index,
                     ost->index);
             if (ost->sync_ist != ist_table[ost->source_index])
                 fprintf(stderr, " [sync #%d.%d]",
                         ost->sync_ist->file_index,
-                        ost->sync_ist->index);
+                        ost->sync_ist->st->index);
             fprintf(stderr, "\n");
         }
     }
@@ -2643,12 +2641,12 @@ static int transcode(AVFormatContext **output_files,
             goto discard_packet;
         }
 
-        //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->index, pkt.size);
+        //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->st->index, pkt.size);
         if (output_packet(ist, ist_index, ost_table, nb_ostreams, &pkt) < 0) {
 
             if (verbose >= 0)
                 fprintf(stderr, "Error while decoding stream #%d.%d\n",
-                        ist->file_index, ist->index);
+                        ist->file_index, ist->st->index);
             if (exit_on_error)
                 ffmpeg_exit(1);
             av_free_packet(&pkt);

From 07633154add3cf59f281ba0c9eb689df4284e2cb Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 22 May 2011 22:12:35 +0200
Subject: [PATCH 286/830] ffmpeg: simplify managing input files and streams

Grow the file and stream list in opt_input_file() instead of creating it
all at once in transcode().  This is simpler and will be useful for
following commits.
---
 ffmpeg.c | 163 +++++++++++++++++++++++--------------------------------
 1 file changed, 68 insertions(+), 95 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index ddc48c2354..17e95de8cd 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -105,11 +105,9 @@ static const OptionDef options[];
 #define FFM_PACKET_SIZE 4096 //XXX a duplicate of the line in ffm.h
 
 static const char *last_asked_format = NULL;
-static AVFormatContext *input_files[MAX_FILES];
 static int64_t input_files_ts_offset[MAX_FILES];
 static double *input_files_ts_scale[MAX_FILES] = {NULL};
 static AVCodec **input_codecs = NULL;
-static int nb_input_files = 0;
 static int nb_input_codecs = 0;
 static int nb_input_files_ts_scale[MAX_FILES] = {0};
 
@@ -327,12 +325,18 @@ typedef struct AVInputStream {
 } AVInputStream;
 
 typedef struct AVInputFile {
+    AVFormatContext *ctx;
     int eof_reached;      /* true if eof reached */
     int ist_index;        /* index of first stream in ist_table */
     int buffer_size;      /* current total buffer size */
     int nb_streams;       /* nb streams we are aware of */
 } AVInputFile;
 
+static AVInputStream *input_streams = NULL;
+static int         nb_input_streams = 0;
+static AVInputFile   *input_files   = NULL;
+static int         nb_input_files   = 0;
+
 #if CONFIG_AVFILTER
 
 static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
@@ -462,7 +466,7 @@ static int ffmpeg_exit(int ret)
         av_free(output_streams_for_file[i]);
     }
     for(i=0;i<nb_input_files;i++) {
-        av_close_input_file(input_files[i]);
+        av_close_input_file(input_files[i].ctx);
         av_free(input_files_ts_scale[i]);
     }
 
@@ -479,6 +483,9 @@ static int ffmpeg_exit(int ret)
     av_free(stream_maps);
     av_free(meta_data_maps);
 
+    av_freep(&input_streams);
+    av_freep(&input_files);
+
     av_free(video_codec_name);
     av_free(audio_codec_name);
     av_free(subtitle_codec_name);
@@ -1861,7 +1868,7 @@ static void print_sdp(AVFormatContext **avc, int n)
 
 static int copy_chapters(int infile, int outfile)
 {
-    AVFormatContext *is = input_files[infile];
+    AVFormatContext *is = input_files[infile].ctx;
     AVFormatContext *os = output_files[outfile];
     int i;
 
@@ -1927,60 +1934,23 @@ static void parse_forced_key_frames(char *kf, AVOutputStream *ost,
  */
 static int transcode(AVFormatContext **output_files,
                      int nb_output_files,
-                     AVFormatContext **input_files,
+                     AVInputFile *input_files,
                      int nb_input_files,
                      AVStreamMap *stream_maps, int nb_stream_maps)
 {
-    int ret = 0, i, j, k, n, nb_istreams = 0, nb_ostreams = 0;
+    int ret = 0, i, j, k, n, nb_ostreams = 0;
     AVFormatContext *is, *os;
     AVCodecContext *codec, *icodec;
     AVOutputStream *ost, **ost_table = NULL;
-    AVInputStream *ist, **ist_table = NULL;
-    AVInputFile *file_table;
+    AVInputStream *ist;
     char error[1024];
     int want_sdp = 1;
     uint8_t no_packet[MAX_FILES]={0};
     int no_packet_count=0;
 
-    file_table= av_mallocz(nb_input_files * sizeof(AVInputFile));
-    if (!file_table)
-        goto fail;
-
-    /* input stream init */
-    j = 0;
-    for(i=0;i<nb_input_files;i++) {
-        is = input_files[i];
-        file_table[i].ist_index = j;
-        file_table[i].nb_streams = is->nb_streams;
-        j += is->nb_streams;
-    }
-    nb_istreams = j;
-
-    ist_table = av_mallocz(nb_istreams * sizeof(AVInputStream *));
-    if (!ist_table)
-        goto fail;
-
-    for(i=0;i<nb_istreams;i++) {
-        ist = av_mallocz(sizeof(AVInputStream));
-        if (!ist)
-            goto fail;
-        ist_table[i] = ist;
-    }
-    j = 0;
-    for(i=0;i<nb_input_files;i++) {
-        is = input_files[i];
-        for(k=0;k<is->nb_streams;k++) {
-            ist = ist_table[j++];
-            ist->st = is->streams[k];
-            ist->file_index = i;
-            ist->discard = 1; /* the stream is discarded by default
-                                 (changed later) */
-
-            if (rate_emu) {
-                ist->start = av_gettime();
-            }
-        }
-    }
+    if (rate_emu)
+        for (i = 0; i < nb_input_streams; i++)
+            input_streams[i].start = av_gettime();
 
     /* output stream init */
     nb_ostreams = 0;
@@ -2006,7 +1976,7 @@ static int transcode(AVFormatContext **output_files,
         int si = stream_maps[i].stream_index;
 
         if (fi < 0 || fi > nb_input_files - 1 ||
-            si < 0 || si > file_table[fi].nb_streams - 1) {
+            si < 0 || si > input_files[fi].nb_streams - 1) {
             fprintf(stderr,"Could not find input stream #%d.%d\n", fi, si);
             ret = AVERROR(EINVAL);
             goto fail;
@@ -2014,7 +1984,7 @@ static int transcode(AVFormatContext **output_files,
         fi = stream_maps[i].sync_file_index;
         si = stream_maps[i].sync_stream_index;
         if (fi < 0 || fi > nb_input_files - 1 ||
-            si < 0 || si > file_table[fi].nb_streams - 1) {
+            si < 0 || si > input_files[fi].nb_streams - 1) {
             fprintf(stderr,"Could not find sync stream #%d.%d\n", fi, si);
             ret = AVERROR(EINVAL);
             goto fail;
@@ -2032,11 +2002,11 @@ static int transcode(AVFormatContext **output_files,
             ost = ost_table[n] = output_streams_for_file[k][i];
             ost->st = os->streams[i];
             if (nb_stream_maps > 0) {
-                ost->source_index = file_table[stream_maps[n].file_index].ist_index +
+                ost->source_index = input_files[stream_maps[n].file_index].ist_index +
                     stream_maps[n].stream_index;
 
                 /* Sanity check that the stream types match */
-                if (ist_table[ost->source_index]->st->codec->codec_type != ost->st->codec->codec_type) {
+                if (input_streams[ost->source_index].st->codec->codec_type != ost->st->codec->codec_type) {
                     int i= ost->file_index;
                     av_dump_format(output_files[i], i, output_files[i]->filename, 1);
                     fprintf(stderr, "Codec type mismatch for mapping #%d.%d -> #%d.%d\n",
@@ -2049,12 +2019,12 @@ static int transcode(AVFormatContext **output_files,
                 int best_nb_frames=-1;
                 /* get corresponding input stream index : we select the first one with the right type */
                 found = 0;
-                for(j=0;j<nb_istreams;j++) {
+                for (j = 0; j < nb_input_streams; j++) {
                     int skip=0;
-                    ist = ist_table[j];
+                    ist = &input_streams[j];
                     if(opt_programid){
                         int pi,si;
-                        AVFormatContext *f= input_files[ ist->file_index ];
+                        AVFormatContext *f = input_files[ist->file_index].ctx;
                         skip=1;
                         for(pi=0; pi<f->nb_programs; pi++){
                             AVProgram *p= f->programs[pi];
@@ -2078,8 +2048,8 @@ static int transcode(AVFormatContext **output_files,
                 if (!found) {
                     if(! opt_programid) {
                         /* try again and reuse existing stream */
-                        for(j=0;j<nb_istreams;j++) {
-                            ist = ist_table[j];
+                        for (j = 0; j < nb_input_streams; j++) {
+                            ist = &input_streams[j];
                             if (   ist->st->codec->codec_type == ost->st->codec->codec_type
                                 && ist->st->discard != AVDISCARD_ALL) {
                                 ost->source_index = j;
@@ -2096,10 +2066,10 @@ static int transcode(AVFormatContext **output_files,
                     }
                 }
             }
-            ist = ist_table[ost->source_index];
+            ist = &input_streams[ost->source_index];
             ist->discard = 0;
             ost->sync_ist = (nb_stream_maps > 0) ?
-                ist_table[file_table[stream_maps[n].sync_file_index].ist_index +
+                &input_streams[input_files[stream_maps[n].sync_file_index].ist_index +
                          stream_maps[n].sync_stream_index] : ist;
         }
     }
@@ -2108,7 +2078,7 @@ static int transcode(AVFormatContext **output_files,
     for(i=0;i<nb_ostreams;i++) {
         ost = ost_table[i];
         os = output_files[ost->file_index];
-        ist = ist_table[ost->source_index];
+        ist = &input_streams[ost->source_index];
 
         codec = ost->st->codec;
         icodec = ist->st->codec;
@@ -2306,7 +2276,7 @@ static int transcode(AVFormatContext **output_files,
         ost = ost_table[i];
         if (ost->encoding_needed) {
             AVCodec *codec = i < nb_output_codecs ? output_codecs[i] : NULL;
-            AVCodecContext *dec = ist_table[ost->source_index]->st->codec;
+            AVCodecContext *dec = input_streams[ost->source_index].st->codec;
             if (!codec)
                 codec = avcodec_find_encoder(ost->st->codec->codec_id);
             if (!codec) {
@@ -2335,8 +2305,8 @@ static int transcode(AVFormatContext **output_files,
     }
 
     /* open each decoder */
-    for(i=0;i<nb_istreams;i++) {
-        ist = ist_table[i];
+    for (i = 0; i < nb_input_streams; i++) {
+        ist = &input_streams[i];
         if (ist->decoding_needed) {
             AVCodec *codec = i < nb_input_codecs ? input_codecs[i] : NULL;
             if (!codec)
@@ -2370,9 +2340,9 @@ static int transcode(AVFormatContext **output_files,
     }
 
     /* init pts */
-    for(i=0;i<nb_istreams;i++) {
+    for (i = 0; i < nb_input_streams; i++) {
         AVStream *st;
-        ist = ist_table[i];
+        ist = &input_streams[i];
         st= ist->st;
         ist->pts = st->avg_frame_rate.num ? - st->codec->has_b_frames*AV_TIME_BASE / av_q2d(st->avg_frame_rate) : 0;
         ist->next_pts = AV_NOPTS_VALUE;
@@ -2402,7 +2372,7 @@ static int transcode(AVFormatContext **output_files,
         METADATA_CHECK_INDEX(in_file_index, nb_input_files, "input file")
 
         files[0] = output_files[out_file_index];
-        files[1] = input_files[in_file_index];
+        files[1] = input_files[in_file_index].ctx;
 
         for (j = 0; j < 2; j++) {
             AVMetaDataMap *map = &meta_data_maps[i][j];
@@ -2433,7 +2403,7 @@ static int transcode(AVFormatContext **output_files,
     if (metadata_global_autocopy) {
 
         for (i = 0; i < nb_output_files; i++)
-            av_metadata_copy(&output_files[i]->metadata, input_files[0]->metadata,
+            av_metadata_copy(&output_files[i]->metadata, input_files[0].ctx->metadata,
                              AV_METADATA_DONT_OVERWRITE);
     }
 
@@ -2460,7 +2430,7 @@ static int transcode(AVFormatContext **output_files,
     /* copy chapters from the first input file that has them*/
     if (!nb_chapter_maps)
         for (i = 0; i < nb_input_files; i++) {
-            if (!input_files[i]->nb_chapters)
+            if (!input_files[i].ctx->nb_chapters)
                 continue;
 
             for (j = 0; j < nb_output_files; j++)
@@ -2495,11 +2465,11 @@ static int transcode(AVFormatContext **output_files,
         for(i=0;i<nb_ostreams;i++) {
             ost = ost_table[i];
             fprintf(stderr, "  Stream #%d.%d -> #%d.%d",
-                    ist_table[ost->source_index]->file_index,
-                    ist_table[ost->source_index]->st->index,
+                    input_streams[ost->source_index].file_index,
+                    input_streams[ost->source_index].st->index,
                     ost->file_index,
                     ost->index);
-            if (ost->sync_ist != ist_table[ost->source_index])
+            if (ost->sync_ist != &input_streams[ost->source_index])
                 fprintf(stderr, " [sync #%d.%d]",
                         ost->sync_ist->file_index,
                         ost->sync_ist->st->index);
@@ -2539,12 +2509,12 @@ static int transcode(AVFormatContext **output_files,
             double ipts, opts;
             ost = ost_table[i];
             os = output_files[ost->file_index];
-            ist = ist_table[ost->source_index];
+            ist = &input_streams[ost->source_index];
             if(ist->is_past_recording_time || no_packet[ist->file_index])
                 continue;
                 opts = ost->st->pts.val * av_q2d(ost->st->time_base);
             ipts = (double)ist->pts;
-            if (!file_table[ist->file_index].eof_reached){
+            if (!input_files[ist->file_index].eof_reached){
                 if(ipts < ipts_min) {
                     ipts_min = ipts;
                     if(input_sync ) file_index = ist->file_index;
@@ -2575,7 +2545,7 @@ static int transcode(AVFormatContext **output_files,
             break;
 
         /* read a frame from it and output it in the fifo */
-        is = input_files[file_index];
+        is = input_files[file_index].ctx;
         ret= av_read_frame(is, &pkt);
         if(ret == AVERROR(EAGAIN)){
             no_packet[file_index]=1;
@@ -2583,7 +2553,7 @@ static int transcode(AVFormatContext **output_files,
             continue;
         }
         if (ret < 0) {
-            file_table[file_index].eof_reached = 1;
+            input_files[file_index].eof_reached = 1;
             if (opt_shortest)
                 break;
             else
@@ -2599,10 +2569,10 @@ static int transcode(AVFormatContext **output_files,
         }
         /* the following test is needed in case new streams appear
            dynamically in stream : we ignore them */
-        if (pkt.stream_index >= file_table[file_index].nb_streams)
+        if (pkt.stream_index >= input_files[file_index].nb_streams)
             goto discard_packet;
-        ist_index = file_table[file_index].ist_index + pkt.stream_index;
-        ist = ist_table[ist_index];
+        ist_index = input_files[file_index].ist_index + pkt.stream_index;
+        ist = &input_streams[ist_index];
         if (ist->discard)
             goto discard_packet;
 
@@ -2661,8 +2631,8 @@ static int transcode(AVFormatContext **output_files,
     }
 
     /* at the end of stream, we must flush the decoder buffers */
-    for(i=0;i<nb_istreams;i++) {
-        ist = ist_table[i];
+    for (i = 0; i < nb_input_streams; i++) {
+        ist = &input_streams[i];
         if (ist->decoding_needed) {
             output_packet(ist, i, ost_table, nb_ostreams, NULL);
         }
@@ -2692,8 +2662,8 @@ static int transcode(AVFormatContext **output_files,
     }
 
     /* close each decoder */
-    for(i=0;i<nb_istreams;i++) {
-        ist = ist_table[i];
+    for (i = 0; i < nb_input_streams; i++) {
+        ist = &input_streams[i];
         if (ist->decoding_needed) {
             avcodec_close(ist->st->codec);
         }
@@ -2704,15 +2674,7 @@ static int transcode(AVFormatContext **output_files,
 
  fail:
     av_freep(&bit_buffer);
-    av_free(file_table);
 
-    if (ist_table) {
-        for(i=0;i<nb_istreams;i++) {
-            ist = ist_table[i];
-            av_free(ist);
-        }
-        av_free(ist_table);
-    }
     if (ost_table) {
         for(i=0;i<nb_ostreams;i++) {
             ost = ost_table[i];
@@ -3275,8 +3237,17 @@ static void opt_input_file(const char *filename)
     for(i=0;i<ic->nb_streams;i++) {
         AVStream *st = ic->streams[i];
         AVCodecContext *dec = st->codec;
+        AVInputStream *ist;
+
         dec->thread_count = thread_count;
         input_codecs = grow_array(input_codecs, sizeof(*input_codecs), &nb_input_codecs, nb_input_codecs + 1);
+
+        input_streams = grow_array(input_streams, sizeof(*input_streams), &nb_input_streams, nb_input_streams + 1);
+        ist = &input_streams[nb_input_streams - 1];
+        ist->st = st;
+        ist->file_index = nb_input_files;
+        ist->discard = 1;
+
         switch (dec->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(audio_codec_name);
@@ -3343,13 +3314,15 @@ static void opt_input_file(const char *filename)
         }
     }
 
-    input_files[nb_input_files] = ic;
     input_files_ts_offset[nb_input_files] = input_ts_offset - (copy_ts ? 0 : timestamp);
     /* dump the file content */
     if (verbose >= 0)
         av_dump_format(ic, nb_input_files, filename, 0);
 
-    nb_input_files++;
+    input_files = grow_array(input_files, sizeof(*input_files), &nb_input_files, nb_input_files + 1);
+    input_files[nb_input_files - 1].ctx        = ic;
+    input_files[nb_input_files - 1].ist_index  = nb_input_streams - ic->nb_streams;
+    input_files[nb_input_files - 1].nb_streams = ic->nb_streams;
 
     video_channel = 0;
 
@@ -3374,7 +3347,7 @@ static void check_inputs(int *has_video_ptr,
     has_data = 0;
 
     for(j=0;j<nb_input_files;j++) {
-        ic = input_files[j];
+        ic = input_files[j].ctx;
         for(i=0;i<ic->nb_streams;i++) {
             AVCodecContext *enc = ic->streams[i]->codec;
             switch(enc->codec_type) {
@@ -4070,9 +4043,9 @@ static void opt_target(const char *arg)
             /* Try to determine PAL/NTSC by peeking in the input files */
             if(nb_input_files) {
                 int i, j;
-                for(j = 0; j < nb_input_files; j++) {
-                    for(i = 0; i < input_files[j]->nb_streams; i++) {
-                        AVCodecContext *c = input_files[j]->streams[i]->codec;
+                for (j = 0; j < nb_input_files; j++) {
+                    for (i = 0; i < input_files[j].ctx->nb_streams; i++) {
+                        AVCodecContext *c = input_files[j].ctx->streams[i]->codec;
                         if(c->codec_type != AVMEDIA_TYPE_VIDEO)
                             continue;
                         fr = c->time_base.den * 1000 / c->time_base.num;

From 2cf8355f98681bdd726b739008acd5483f82f8d7 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 17:19:54 +0200
Subject: [PATCH 287/830] ffmpeg: get rid of useless AVInputStream.nb_streams.

It's a duplicate of AVFormatContext.nb_streams.
---
 ffmpeg.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 17e95de8cd..25192dedec 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -329,7 +329,6 @@ typedef struct AVInputFile {
     int eof_reached;      /* true if eof reached */
     int ist_index;        /* index of first stream in ist_table */
     int buffer_size;      /* current total buffer size */
-    int nb_streams;       /* nb streams we are aware of */
 } AVInputFile;
 
 static AVInputStream *input_streams = NULL;
@@ -1976,7 +1975,7 @@ static int transcode(AVFormatContext **output_files,
         int si = stream_maps[i].stream_index;
 
         if (fi < 0 || fi > nb_input_files - 1 ||
-            si < 0 || si > input_files[fi].nb_streams - 1) {
+            si < 0 || si > input_files[fi].ctx->nb_streams - 1) {
             fprintf(stderr,"Could not find input stream #%d.%d\n", fi, si);
             ret = AVERROR(EINVAL);
             goto fail;
@@ -1984,7 +1983,7 @@ static int transcode(AVFormatContext **output_files,
         fi = stream_maps[i].sync_file_index;
         si = stream_maps[i].sync_stream_index;
         if (fi < 0 || fi > nb_input_files - 1 ||
-            si < 0 || si > input_files[fi].nb_streams - 1) {
+            si < 0 || si > input_files[fi].ctx->nb_streams - 1) {
             fprintf(stderr,"Could not find sync stream #%d.%d\n", fi, si);
             ret = AVERROR(EINVAL);
             goto fail;
@@ -2569,7 +2568,7 @@ static int transcode(AVFormatContext **output_files,
         }
         /* the following test is needed in case new streams appear
            dynamically in stream : we ignore them */
-        if (pkt.stream_index >= input_files[file_index].nb_streams)
+        if (pkt.stream_index >= input_files[file_index].ctx->nb_streams)
             goto discard_packet;
         ist_index = input_files[file_index].ist_index + pkt.stream_index;
         ist = &input_streams[ist_index];
@@ -3322,7 +3321,6 @@ static void opt_input_file(const char *filename)
     input_files = grow_array(input_files, sizeof(*input_files), &nb_input_files, nb_input_files + 1);
     input_files[nb_input_files - 1].ctx        = ic;
     input_files[nb_input_files - 1].ist_index  = nb_input_streams - ic->nb_streams;
-    input_files[nb_input_files - 1].nb_streams = ic->nb_streams;
 
     video_channel = 0;
 

From 656566d7a4fef9680e483f1015d4cc242d825202 Mon Sep 17 00:00:00 2001
From: John Stebbins <stebbins@jetheaddev.com>
Date: Mon, 23 May 2011 14:26:54 -0700
Subject: [PATCH 288/830] Fix end time of last chapter in compute_chapters_end

Parenthesis are misplaced in calculation of max_time.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index ad226016aa..8cdd8b3805 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2110,7 +2110,7 @@ enum CodecID av_codec_get_id(const AVCodecTag * const *tags, unsigned int tag)
 static void compute_chapters_end(AVFormatContext *s)
 {
     unsigned int i, j;
-    int64_t max_time = s->duration + (s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time;
+    int64_t max_time = s->duration + ((s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time);
 
     for (i = 0; i < s->nb_chapters; i++)
         if (s->chapters[i]->end == AV_NOPTS_VALUE) {

From 67540af7baa5c4064753861be217ac8f7c8df997 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 23 May 2011 19:23:55 +0200
Subject: [PATCH 289/830] tty: replace AVFormatParameters.sample_rate abuse
 with a private option.

---
 libavformat/tty.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/libavformat/tty.c b/libavformat/tty.c
index ea8d0c7ece..bc6058d857 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -26,12 +26,13 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
 #include "avformat.h"
 #include "sauce.h"
 
-#define LINE_RATE 6000 /* characters per second */
-
 typedef struct {
+    AVClass *class;
     int chars_per_frame;
     uint64_t fsize;  /**< file size less metadata buffer */
 } TtyDemuxContext;
@@ -86,7 +87,11 @@ static int read_header(AVFormatContext *avctx,
     }
 
     /* simulate tty display speed */
-    s->chars_per_frame = FFMAX(av_q2d(st->time_base) * (ap->sample_rate ? ap->sample_rate : LINE_RATE), 1);
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->sample_rate)
+        s->chars_per_frame = ap->sample_rate;
+#endif
+    s->chars_per_frame = FFMAX(av_q2d(st->time_base)*s->chars_per_frame, 1);
 
     if (avctx->pb->seekable) {
         s->fsize = avio_size(avctx->pb);
@@ -124,6 +129,18 @@ static int read_packet(AVFormatContext *avctx, AVPacket *pkt)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "chars_per_frame", "", offsetof(TtyDemuxContext, chars_per_frame), FF_OPT_TYPE_INT, {.dbl = 6000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
+    { NULL },
+};
+
+static const AVClass tty_demuxer_class = {
+    .class_name     = "TTY demuxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_tty_demuxer = {
     .name           = "tty",
     .long_name      = NULL_IF_CONFIG_SMALL("Tele-typewriter"),
@@ -131,4 +148,5 @@ AVInputFormat ff_tty_demuxer = {
     .read_header    = read_header,
     .read_packet    = read_packet,
     .extensions     = "ans,art,asc,diz,ice,nfo,txt,vt",
+    .priv_class     = &tty_demuxer_class,
 };

From b8e3b7414733a685bd1f62d65cfdfafac6cc9027 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 25 May 2011 18:29:55 +0200
Subject: [PATCH 290/830] vf_mp: fix name of the remove-logo filter referenced
 in filters.texi

Correct the name from "remove_logo" to "remove-logo".
---
 doc/filters.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 18f46f96df..719d94f45a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -761,7 +761,7 @@ The list of the currently supported filters follows:
 @item pullup
 @item qp
 @item rectangle
-@item remove_logo
+@item remove-logo
 @item rgbtest
 @item rotate
 @item sab

From 364889cf9c1f3c5e816a30d30d714a84765cfc29 Mon Sep 17 00:00:00 2001
From: Jean-Tiare Le Bigot <jtlebi@gmail>
Date: Wed, 25 May 2011 20:12:17 +0200
Subject: [PATCH 291/830] Return -1 on invalid input instead of crashing.

---
 libswscale/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 56920a10a3..fada19210b 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -710,7 +710,7 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
 
 int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation)
 {
-    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
+    if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
 
     *inv_table = c->srcColorspaceTable;
     *table     = c->dstColorspaceTable;

From 4a056160bef8578dc1e370e2445f61f2459e3863 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 20:49:52 +0200
Subject: [PATCH 292/830] swscale: remove duplicatiopn of rgb24toyv12_c()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/rgb2rgb.h              |  7 +++++
 libswscale/rgb2rgb_template.c     |  2 +-
 libswscale/x86/rgb2rgb_template.c | 47 +------------------------------
 3 files changed, 9 insertions(+), 47 deletions(-)

diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 6688c5967e..e31d9cc9e9 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -68,6 +68,13 @@ void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size);
 void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size);
 void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size);
 
+void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
+                   uint8_t *udst, uint8_t *vdst,
+                   long width, long height,
+                   long lumStride, long chromStride,
+                   long srcStride);
+
+
 #if LIBSWSCALE_VERSION_MAJOR < 1
 /* deprecated, use the public versions in swscale.h */
 attribute_deprecated void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index eeb9c12795..c71417d7e1 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -633,7 +633,7 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  * others are ignored in the C version.
  * FIXME: Write HQ version.
  */
-static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
+void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
                                  uint8_t *udst, uint8_t *vdst,
                                  long width, long height,
                                  long lumStride, long chromStride,
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index c675f24aa0..8e6ce8586a 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1976,52 +1976,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
                      SFENCE"     \n\t"
                      :::"memory");
 
-    for (; y<height; y+=2) {
-        long i;
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
-            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
-
-            udst[i]     = U;
-            vdst[i]     = V;
-            ydst[2*i]   = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-
-            ydst[2*i]     = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
+     rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
 }
 
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,

From 271c869cc3285dac2b6f2663a87c70bf3ba2b04f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 25 May 2011 19:08:29 +0300
Subject: [PATCH 293/830] rtmp: Don't try to do av_malloc(0)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some received packets can have size 0. The return value from
av_malloc(0) may be NULL, which is ok if the size was 0. On
OS X, however, the returned pointer is non-null but leads to
crashes when trying to free it.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmppkt.c   | 2 ++
 libavformat/rtmpproto.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/rtmppkt.c b/libavformat/rtmppkt.c
index 63b0628799..93790eb525 100644
--- a/libavformat/rtmppkt.c
+++ b/libavformat/rtmppkt.c
@@ -233,9 +233,11 @@ int ff_rtmp_packet_write(URLContext *h, RTMPPacket *pkt,
 int ff_rtmp_packet_create(RTMPPacket *pkt, int channel_id, RTMPPacketType type,
                           int timestamp, int size)
 {
+    if (size) {
     pkt->data = av_malloc(size);
     if (!pkt->data)
         return AVERROR(ENOMEM);
+    }
     pkt->data_size  = size;
     pkt->channel_id = channel_id;
     pkt->type       = type;
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 70e4b142d6..f499bd3b71 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -683,7 +683,7 @@ static int get_packet(URLContext *s, int for_header)
         return AVERROR_EOF;
 
     for (;;) {
-        RTMPPacket rpkt;
+        RTMPPacket rpkt = { 0 };
         if ((ret = ff_rtmp_packet_read(rt->stream, &rpkt,
                                        rt->chunk_size, rt->prev_pkt[0])) <= 0) {
             if (ret == 0) {

From 48b1fb1397281dd16df909b6f33c2c36e5626f0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 25 May 2011 22:01:15 +0300
Subject: [PATCH 294/830] rtmp: Reindent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmppkt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/rtmppkt.c b/libavformat/rtmppkt.c
index 93790eb525..35ef7fdaae 100644
--- a/libavformat/rtmppkt.c
+++ b/libavformat/rtmppkt.c
@@ -234,9 +234,9 @@ int ff_rtmp_packet_create(RTMPPacket *pkt, int channel_id, RTMPPacketType type,
                           int timestamp, int size)
 {
     if (size) {
-    pkt->data = av_malloc(size);
-    if (!pkt->data)
-        return AVERROR(ENOMEM);
+        pkt->data = av_malloc(size);
+        if (!pkt->data)
+            return AVERROR(ENOMEM);
     }
     pkt->data_size  = size;
     pkt->channel_id = channel_id;

From 80068da3a0bafc7e24ce6b1f91cefec7d793b4d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 25 May 2011 16:47:09 +0300
Subject: [PATCH 295/830] avoptions: Return explicitly NAN or {0,0} if the
 option isn't found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This actually matches what av_get_double did earlier, the
0.0/0.0 division was intentional, for producing NAN.

Still keeping the check for the return value from
av_get_number, for clarity.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavutil/opt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index 9e06b01c52..74c39fee5f 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -291,7 +291,7 @@ double av_get_double(void *obj, const char *name, const AVOption **o_out)
     int den=1;
 
     if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0)
-        return -1;
+        return NAN;
     return num*intnum/den;
 }
 
@@ -302,7 +302,7 @@ AVRational av_get_q(void *obj, const char *name, const AVOption **o_out)
     int den=1;
 
     if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0)
-        return (AVRational){-1, 0};
+        return (AVRational){0, 0};
     if (num == 1.0 && (int)intnum == intnum)
         return (AVRational){intnum, den};
     else

From e92bdadb2d8bd5cb0b2c5bedf42a390371cc4824 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 21:03:26 +0200
Subject: [PATCH 296/830] Revert removial of SWS flags from
 e66149e714006d099d1ebfcca3f22ca74fc7dcf4

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index e954592236..d026fb0bdc 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -95,6 +95,13 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
+#define SWS_CPU_CAPS_MMX      0x80000000
+#define SWS_CPU_CAPS_MMX2     0x20000000
+#define SWS_CPU_CAPS_3DNOW    0x40000000
+#define SWS_CPU_CAPS_ALTIVEC  0x10000000
+#define SWS_CPU_CAPS_BFIN     0x01000000
+#define SWS_CPU_CAPS_SSE2     0x02000000
+
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 
 #define SWS_CS_ITU709         1

From b49728df732109c270af47f98b0d380cb1454c2a Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 21:04:44 +0200
Subject: [PATCH 297/830] swscale: document SWS_CPU_CAPS*

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index d026fb0bdc..3d2a38f9d4 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -95,6 +95,7 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
+//The following flags are only provided for API/ABI compatibility they have no effect anymore
 #define SWS_CPU_CAPS_MMX      0x80000000
 #define SWS_CPU_CAPS_MMX2     0x20000000
 #define SWS_CPU_CAPS_3DNOW    0x40000000

From 30315a8d9c9bea98d54260956db58b6df56f9347 Mon Sep 17 00:00:00 2001
From: Alex Converse <aconverse@google.com>
Date: Mon, 9 May 2011 14:34:23 -0700
Subject: [PATCH 298/830] avformat: Add fpsprobesize as an AVOption.

---
 doc/APIchanges         | 3 +++
 libavformat/avformat.h | 5 +++++
 libavformat/options.c  | 1 +
 libavformat/utils.c    | 2 ++
 libavformat/version.h  | 4 ++--
 5 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 853975e76d..a99b798d80 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-25 - XXXXXXX - lavf 53.1.0 - avformat.h
+  Add fps_probe_size to AVFormatContext.
+
 2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt
   Add request_sample_fmt field to AVCodecContext.
 
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 11dbe8b2de..3a652a354b 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -817,6 +817,11 @@ typedef struct AVFormatContext {
      * - decoding: Unused.
      */
     int64_t start_time_realtime;
+
+    /**
+     * decoding: number of frames used to probe fps
+     */
+    int fps_probe_size;
 } AVFormatContext;
 
 typedef struct AVPacketList {
diff --git a/libavformat/options.c b/libavformat/options.c
index 22807c3058..377ba93b44 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -57,6 +57,7 @@ static const AVOption options[]={
 {"fdebug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, E|D, "fdebug"},
 {"ts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_FDEBUG_TS }, INT_MIN, INT_MAX, E|D, "fdebug"},
 {"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E|D},
+{"fpsprobesize", "number of frames used to probe fps", OFFSET(fps_probe_size), FF_OPT_TYPE_INT, -1, -1, INT_MAX-1, D},
 {NULL},
 };
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 8cdd8b3805..89ab1086fe 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2229,6 +2229,8 @@ int av_find_stream_info(AVFormatContext *ic)
                the correct fps */
             if (av_q2d(st->time_base) > 0.0005)
                 fps_analyze_framecount *= 2;
+            if (ic->fps_probe_size >= 0)
+                fps_analyze_framecount = ic->fps_probe_size;
             /* variable fps and no guess at the real fps */
             if(   tb_unreliable(st->codec) && !(st->r_frame_rate.num && st->avg_frame_rate.num)
                && st->info->duration_count < fps_analyze_framecount
diff --git a/libavformat/version.h b/libavformat/version.h
index b4fdb6f8b5..b2ac6d8181 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -24,8 +24,8 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
-#define LIBAVFORMAT_VERSION_MINOR  0
-#define LIBAVFORMAT_VERSION_MICRO  3
+#define LIBAVFORMAT_VERSION_MINOR  1
+#define LIBAVFORMAT_VERSION_MICRO  0
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 19a686b8a373e93807880c63bd73303616c96b71 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 25 May 2011 12:36:41 -0700
Subject: [PATCH 299/830] APIchanges: Fill in git hash for fps_probe_size
 (30315a8)

---
 doc/APIchanges | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index a99b798d80..90dd0ec10e 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,7 +13,7 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-05-25 - XXXXXXX - lavf 53.1.0 - avformat.h
+2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
   Add fps_probe_size to AVFormatContext.
 
 2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt

From 8c51620f219db0f408d2833c3e5d56cedaf0d95a Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Tue, 24 May 2011 16:28:45 -0700
Subject: [PATCH 300/830] Remove h263_msmpeg4 from MpegEncContext.

It was long ago superseded by msmpeg4_version.
---
 libavcodec/h263dec.c       | 8 +-------
 libavcodec/mpegvideo.c     | 2 +-
 libavcodec/mpegvideo.h     | 2 --
 libavcodec/mpegvideo_enc.c | 8 ++------
 4 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index fee5d32240..e2627fa8e4 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -69,33 +69,27 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
     case CODEC_ID_MPEG4:
         break;
     case CODEC_ID_MSMPEG4V1:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=1;
         break;
     case CODEC_ID_MSMPEG4V2:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=2;
         break;
     case CODEC_ID_MSMPEG4V3:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=3;
         break;
     case CODEC_ID_WMV1:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=4;
         break;
     case CODEC_ID_WMV2:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=5;
         break;
     case CODEC_ID_VC1:
     case CODEC_ID_WMV3:
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->msmpeg4_version=6;
         avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
@@ -655,7 +649,7 @@ retry:
         decode_slice(s);
     }
 
-    if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==AV_PICTURE_TYPE_I)
+    if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type==AV_PICTURE_TYPE_I)
         if(!CONFIG_MSMPEG4_DECODER || msmpeg4_decode_ext_header(s, buf_size) < 0){
             s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR;
         }
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 09e813476a..f24e11910a 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1986,7 +1986,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
             }
 
             /* add dct residue */
-            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
+            if(s->encoding || !(   s->msmpeg4_version || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index b4afee29f9..6d5ab19283 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -207,7 +207,6 @@ typedef struct MpegEncContext {
 
 /* the following codec id fields are deprecated in favor of codec_id */
     int h263_plus;    ///< h263 plus headers
-    int h263_msmpeg4; ///< generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)
     int h263_flv;     ///< use flv h263 header
 
     enum CodecID codec_id;     /* see CODEC_ID_xxx */
@@ -831,4 +830,3 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
                        int motion_x, int motion_y);
 
 #endif /* AVCODEC_MPEGVIDEO_H */
-
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 8df05c4c7a..a212149189 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -639,7 +639,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
         break;
     case CODEC_ID_MSMPEG4V2:
         s->out_format = FMT_H263;
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 2;
@@ -648,7 +647,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
         break;
     case CODEC_ID_MSMPEG4V3:
         s->out_format = FMT_H263;
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 3;
@@ -658,7 +656,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
         break;
     case CODEC_ID_WMV1:
         s->out_format = FMT_H263;
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 4;
@@ -668,7 +665,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
         break;
     case CODEC_ID_WMV2:
         s->out_format = FMT_H263;
-        s->h263_msmpeg4 = 1;
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 5;
@@ -2771,7 +2767,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 
     /* we need to initialize some time vars before we can encode b-frames */
     // RAL: Condition added for MPEG1VIDEO
-    if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
+    if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
         set_frame_distances(s);
     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
         ff_set_mpeg4_time(s);
@@ -2940,7 +2936,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     case FMT_H263:
         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
             ff_wmv2_encode_picture_header(s, picture_number);
-        else if (CONFIG_MSMPEG4_ENCODER && s->h263_msmpeg4)
+        else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
             msmpeg4_encode_picture_header(s, picture_number);
         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
             mpeg4_encode_picture_header(s, picture_number);

From 7a54edaa16be3673a2ae8110d1777462c7f6ed7c Mon Sep 17 00:00:00 2001
From: ami_stuff <ami_stuff@o2.pl>
Date: Wed, 25 May 2011 19:53:38 +0200
Subject: [PATCH 301/830] Fail when lowres value is lower than 0

The attached patch fixes the crash which happens when user passes lowres value lower than 0 to FFplay.

ffplay -lowres -1 test.mpg

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 5354459866..95f41f315a 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -563,7 +563,7 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
         }
     }
 
-    if (avctx->codec->max_lowres < avctx->lowres) {
+    if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
         av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
                avctx->codec->max_lowres);
         ret = AVERROR(EINVAL);

From feae7ad2f809c8e1007503031e231d1590514d66 Mon Sep 17 00:00:00 2001
From: ami_stuff <ami_stuff@o2.pl>
Date: Wed, 25 May 2011 23:38:16 +0200
Subject: [PATCH 302/830] mpegvideo_enc: use AV_LOG_ERROR instead of
 AV_LOG_INFO for two error messages

use AV_LOG_ERROR instead of AV_LOG_INFO for two error messages

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mpegvideo_enc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 7c573b4406..0bc694ca3e 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -352,7 +352,7 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
     }
 
     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
-        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
+        av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
         return -1;
     }
 
@@ -582,7 +582,7 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
     case CODEC_ID_H263:
         if (!CONFIG_H263_ENCODER)  return -1;
         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format), s->width, s->height) == 8) {
-            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
+            av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
             return -1;
         }
         s->out_format = FMT_H263;

From 7dc303a60ef7205d88a11ddbcb251f3237a3945e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 May 2011 20:49:52 +0200
Subject: [PATCH 303/830] swscale: Eliminate rgb24toyv12_c() duplication.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libswscale/rgb2rgb.h              |  4 +++
 libswscale/rgb2rgb_template.c     |  8 ++----
 libswscale/x86/rgb2rgb_template.c | 47 +------------------------------
 3 files changed, 8 insertions(+), 51 deletions(-)

diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index a8d5531cb8..4dd0d95674 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -68,6 +68,10 @@ void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size);
 void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size);
 void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size);
 
+void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, long width, long height, long lumStride,
+                   long chromStride, long srcStride);
+
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index ea39be4f13..fed7e5e195 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -633,11 +633,9 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  * others are ignored in the C version.
  * FIXME: Write HQ version.
  */
-static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst,
-                                 uint8_t *udst, uint8_t *vdst,
-                                 long width, long height,
-                                 long lumStride, long chromStride,
-                                 long srcStride)
+void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, long width, long height, long lumStride,
+                   long chromStride, long srcStride)
 {
     long y;
     const int chromWidth = width >> 1;
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index a083fc8674..ce635dfde4 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1976,52 +1976,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
                      SFENCE"     \n\t"
                      :::"memory");
 
-    for (; y<height; y+=2) {
-        long i;
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
-            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
-
-            udst[i]     = U;
-            vdst[i]     = V;
-            ydst[2*i]   = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-
-            ydst[2*i]     = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
+     rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
 }
 
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,

From 6f7a280eadd719d0a5c9b72b63dfca61bb9605f5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 25 May 2011 22:07:55 +0200
Subject: [PATCH 304/830] swscale: Remove orphaned, commented-out function
 declaration.

---
 libswscale/rgb2rgb.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 4dd0d95674..6d6154b23b 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -72,14 +72,6 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                    uint8_t *vdst, long width, long height, long lumStride,
                    long chromStride, long srcStride);
 
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write high quality version.
- */
-//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)

From a2ee2843c09a6116b090020eff8213b86ea98bdb Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 19 May 2011 19:41:19 +0200
Subject: [PATCH 305/830] doc: create separate section for audio encoders

Having a separate section for audio encoders simplifies navigation
and is slightly more consistent with the rest of the manual.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 doc/encoders.texi | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 760ec4bad9..d507b66f51 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -17,11 +17,15 @@ with the options @code{--enable-encoder=@var{ENCODER}} /
 The option @code{-codecs} of the ff* tools will display the list of
 enabled encoders.
 
-A description of some of the currently available encoders follows.
+@c man end ENCODERS
 
-@section Audio Encoders
+@chapter Audio Encoders
+@c man begin AUDIO ENCODERS
 
-@subsection ac3 and ac3_fixed
+A description of some of the currently available audio encoders
+follows.
+
+@section ac3 and ac3_fixed
 
 AC-3 audio encoders.
 
@@ -36,7 +40,7 @@ quality audio for a given bitrate. The @var{ac3_fixed} encoder is not the
 default codec for any of the output formats, so it must be specified explicitly
 using the option @code{-acodec ac3_fixed} in order to use it.
 
-@subheading AC-3 Metadata
+@subsection AC-3 Metadata
 
 The AC-3 metadata options are used to set parameters that describe the audio,
 but in most cases do not affect the audio encoding itself. Some of the options
@@ -55,7 +59,7 @@ documents.
 @item @uref{http://www.dolby.com/uploadedFiles/zz-_Shared_Assets/English_PDFs/Professional/46_DDEncodingGuidelines.pdf,Dolby Digital Professional Encoding Guidelines}
 @end itemize
 
-@subsubheading Metadata Control Options
+@subsubsection Metadata Control Options
 
 @table @option
 
@@ -72,7 +76,7 @@ Metadata values can be changed before encoding each frame.
 
 @end table
 
-@subsubheading Downmix Levels
+@subsubsection Downmix Levels
 
 @table @option
 
@@ -106,7 +110,7 @@ Silence Surround Channel(s)
 
 @end table
 
-@subsubheading Audio Production Information
+@subsubsection Audio Production Information
 Audio Production Information is optional information describing the mixing
 environment.  Either none or both of the fields are written to the bitstream.
 
@@ -140,7 +144,7 @@ Small Room
 
 @end table
 
-@subsubheading Other Metadata Options
+@subsubsection Other Metadata Options
 
 @table @option
 
@@ -195,7 +199,7 @@ Original Source (default)
 
 @end table
 
-@subsubheading Extended Bitstream Information
+@subsection Extended Bitstream Information
 The extended bitstream options are part of the Alternate Bit Stream Syntax as
 specified in Annex D of the A/52:2010 standard. It is grouped into 2 parts.
 If any one parameter in a group is specified, all values in that group will be
@@ -205,7 +209,7 @@ will use these values instead of the ones specified in the @code{center_mixlev}
 and @code{surround_mixlev} options if it supports the Alternate Bit Stream
 Syntax.
 
-@subsubheading Extended Bitstream Information - Part 1
+@subsubsection Extended Bitstream Information - Part 1
 
 @table @option
 
@@ -302,7 +306,7 @@ Silence Surround Channel(s)
 
 @end table
 
-@subsubheading Extended Bitstream Information - Part 2
+@subsubsection Extended Bitstream Information - Part 2
 
 @table @option
 
@@ -353,7 +357,7 @@ HDCD A/D Converter
 
 @end table
 
-@subheading Other AC-3 Encoding Options
+@subsection Other AC-3 Encoding Options
 
 @table @option
 
@@ -408,4 +412,4 @@ Selected by Encoder (default)
 
 @end table
 
-@c man end ENCODERS
+@c man end AUDIO ENCODERS

From 189db9c9829b970b3e28006c9f00d6960f71cff1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 26 May 2011 01:55:12 +0200
Subject: [PATCH 306/830] ffv1: fix 16bits multithreading

fixes ticket237

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/ffv1.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 4e0e5f7b62..a0a4a1d009 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -1146,6 +1146,7 @@ static int encode_slice(AVCodecContext *c, void *arg){
     int x= fs->slice_x;
     int y= fs->slice_y;
     AVFrame * const p= &f->picture;
+    const int ps= (c->bits_per_raw_sample>8)+1;
 
     if(f->colorspace==0){
         const int chroma_width = -((-width )>>f->chroma_h_shift);
@@ -1153,12 +1154,12 @@ static int encode_slice(AVCodecContext *c, void *arg){
         const int cx= x>>f->chroma_h_shift;
         const int cy= y>>f->chroma_v_shift;
 
-        encode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0);
+        encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
 
-        encode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-        encode_plane(fs, p->data[2] + cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
+        encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
+        encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
     }else{
-        encode_rgb_frame(fs, (uint32_t*)(p->data[0]) + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
+        encode_rgb_frame(fs, (uint32_t*)(p->data[0]) + ps*x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
     }
     emms_c();
 
@@ -1475,6 +1476,7 @@ static int decode_slice(AVCodecContext *c, void *arg){
     int height= fs->slice_height;
     int x= fs->slice_x;
     int y= fs->slice_y;
+    const int ps= (c->bits_per_raw_sample>8)+1;
     AVFrame * const p= &f->picture;
 
     av_assert1(width && height);
@@ -1483,12 +1485,12 @@ static int decode_slice(AVCodecContext *c, void *arg){
         const int chroma_height= -((-height)>>f->chroma_v_shift);
         const int cx= x>>f->chroma_h_shift;
         const int cy= y>>f->chroma_v_shift;
-        decode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0);
+        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
 
-        decode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-        decode_plane(fs, p->data[2] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[2], 1);
+        decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
+        decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[2], 1);
     }else{
-        decode_rgb_frame(fs, (uint32_t*)p->data[0] + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
+        decode_rgb_frame(fs, (uint32_t*)p->data[0] + ps*x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
     }
 
     emms_c();

From 478455d66b80e335bdabc00df5dee298d630cbab Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 25 May 2011 20:12:57 -0400
Subject: [PATCH 307/830] ac3enc: initialize all coefficients to zero.

Uninitialized coefficients were being used to generate exponents, some
of which actually ended up in the final stream.  Even though, they were
just extra exponents that are not used by any decoder, it is still
better to have consistent output for testing. This also fixes valgrind
errors.
---
 libavcodec/ac3enc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 5014fdb753..ec3ffb30e4 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -583,8 +583,8 @@ static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
 static void apply_channel_coupling(AC3EncodeContext *s)
 {
 #if CONFIG_AC3ENC_FLOAT
-    DECLARE_ALIGNED(16, float,   cpl_coords)      [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
-    DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
+    DECLARE_ALIGNED(16, float,   cpl_coords)      [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+    DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
     int blk, ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
     int num_cpl_coefs = s->num_cpl_subbands * 12;
@@ -2658,8 +2658,8 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
                      AC3_MAX_COEFS * sizeof(*s->bap_buffer),  alloc_fail);
     FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
+    FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
+                      AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
     FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
     FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
@@ -2723,8 +2723,8 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
     }
 
     if (CONFIG_AC3ENC_FLOAT) {
-        FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
-                         AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
+        FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
+                          AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
             AC3Block *block = &s->blocks[blk];
             FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *

From 00931991849f2c7f250ea583c749493cf1b611fc Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 01:40:56 +0200
Subject: [PATCH 308/830] swscale: Remove commented-out printf cruft.

---
 libswscale/swscale-test.c         | 6 ------
 libswscale/swscale_template.c     | 2 --
 libswscale/utils.c                | 1 -
 libswscale/x86/swscale_template.c | 1 -
 4 files changed, 10 deletions(-)

diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 22d8a70d51..b5cf1d202e 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -58,15 +58,11 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
     int x,y;
     uint64_t ssd=0;
 
-//printf("%d %d\n", w, h);
-
     for (y=0; y<h; y++) {
         for (x=0; x<w; x++) {
             int d= src1[x + y*stride1] - src2[x + y*stride2];
             ssd+= d*d;
-//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
         }
-//printf("\n");
     }
     return ssd;
 }
@@ -162,8 +158,6 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
 
         goto end;
     }
-//    printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
-//        (int)src[0], (int)src[1], (int)src[2]);
 
     printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
            av_pix_fmt_descriptors[srcFormat].name, srcW, srcH,
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index cd6c8c7a65..e27d45be25 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -353,9 +353,7 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
         int j;
         int srcPos= filterPos[i];
         int val=0;
-        //printf("filterPos: %d\n", filterPos[i]);
         for (j=0; j<filterSize; j++) {
-            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
         }
         //filter += hFilterSize;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 29fc975046..96b3207cdd 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -171,7 +171,6 @@ const char *sws_format_name(enum PixelFormat format)
 
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
-//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
     if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
     else           return getSplineCoeff(        0.0,
                                           b+ 2.0*c + 3.0*d,
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 678060f3de..8e7f2411a1 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2036,7 +2036,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
 #endif
         );
         for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-            //printf("%d %d %d\n", dstWidth, i, srcW);
             dst[i] = src1[srcW-1]*128;
             dst[i+VOFW] = src2[srcW-1]*128;
         }

From c7a973e44aba0a2e59fdd8957738c32b9fae872e Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 25 May 2011 21:47:31 +0200
Subject: [PATCH 309/830] swscale: revive SWS_CPU_CAPS until next major bump.

---
 libswscale/swscale.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 1babced737..6ab59c6a59 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -50,6 +50,9 @@
 #ifndef FF_API_SWS_GETCONTEXT
 #define FF_API_SWS_GETCONTEXT  (LIBSWSCALE_VERSION_MAJOR < 2)
 #endif
+#ifndef FF_API_SWS_CPU_CAPS
+#define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 2)
+#endif
 
 /**
  * Returns the LIBSWSCALE_VERSION_INT constant.
@@ -95,6 +98,19 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
+#if FF_API_SWS_CPU_CAPS
+/**
+ * CPU caps are autodetected now, those flags
+ * are only provided for API compatibility.
+ */
+#define SWS_CPU_CAPS_MMX      0x80000000
+#define SWS_CPU_CAPS_MMX2     0x20000000
+#define SWS_CPU_CAPS_3DNOW    0x40000000
+#define SWS_CPU_CAPS_ALTIVEC  0x10000000
+#define SWS_CPU_CAPS_BFIN     0x01000000
+#define SWS_CPU_CAPS_SSE2     0x02000000
+#endif
+
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 
 #define SWS_CS_ITU709         1

From 2341c97e8d737876693fc1e0ecfe7f9f6d2b7c57 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 07:25:41 +0200
Subject: [PATCH 310/830] libdc1394: fix compilation.

Add a forgotten comma and move options/class definition outside of
HAVE_LIBDC1394_1.
---
 libavdevice/libdc1394.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 96e9e9b88b..336c465311 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -90,6 +90,21 @@ struct dc1394_frame_rate {
     { 0, 0 } /* gotta be the last one */
 };
 
+static const AVOption options[] = {
+#if HAVE_LIBDC1394_1
+    { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+#endif
+    { NULL },
+};
+
+static const AVClass libdc1394_class = {
+    .class_name = "libdc1394 indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+
 static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
                                      struct dc1394_frame_format **select_fmt, struct dc1394_frame_rate **select_fps)
 {
@@ -245,20 +260,6 @@ static int dc1394_v1_close(AVFormatContext * context)
     return 0;
 }
 
-static const AVOption options[] = {
-#if HAVE_LIBDC1394_1
-    { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
-#endif
-    { NULL },
-};
-
-static const AVClass libdc1394_class = {
-    .class_name = "libdc1394 indev",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
 #elif HAVE_LIBDC1394_2
 static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap)
 {
@@ -378,7 +379,7 @@ AVInputFormat ff_libdc1394_demuxer = {
     .read_header    = dc1394_v2_read_header,
     .read_packet    = dc1394_v2_read_packet,
     .read_close     = dc1394_v2_close,
-    .flags          = AVFMT_NOFILE
+    .flags          = AVFMT_NOFILE,
     .priv_class     = &libdc1394_class,
 };
 

From db3262b700092e4012ae7564aa29eba1624a398c Mon Sep 17 00:00:00 2001
From: Mike Williams <mike@mikebwilliams.com>
Date: Wed, 18 May 2011 11:03:10 -0400
Subject: [PATCH 311/830] ffserver: Fix a null pointer dereference as a result
 of the FF_API_MAX_STREAMS cleanup.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixed another dereference in the RTSP code.

Removed a useless variable.

Changed an unnecessary looping assignment to a simple assignment suggested by
Maksym.

Added fixes and tweaks suggested by Maksym Veremeyenko [verem@m1stereo.tv] and
Clément B.
---
 ffmpeg.c   |  1 +
 ffserver.c | 21 ++++++++++-----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 25192dedec..f27513da26 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -673,6 +673,7 @@ static int read_ffserver_streams(AVFormatContext *s, const char *filename)
         return err;
     /* copy stream format */
     s->nb_streams = 0;
+    s->streams = av_mallocz(sizeof(AVStream *) * ic->nb_streams);
     for(i=0;i<ic->nb_streams;i++) {
         AVStream *st;
         AVCodec *codec;
diff --git a/ffserver.c b/ffserver.c
index b4613af8fe..b95a854363 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -2229,11 +2229,11 @@ static int http_prepare_data(HTTPContext *c)
         av_metadata_set2(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0);
         av_metadata_set2(&c->fmt_ctx.metadata, "title"    , c->stream->title    , 0);
 
+        c->fmt_ctx.streams = av_mallocz(sizeof(AVStream *) * c->stream->nb_streams);
+
         for(i=0;i<c->stream->nb_streams;i++) {
-            AVStream *st;
             AVStream *src;
-            st = av_mallocz(sizeof(AVStream));
-            c->fmt_ctx.streams[i] = st;
+            c->fmt_ctx.streams[i] = av_mallocz(sizeof(AVStream));
             /* if file or feed, then just take streams from FFStream struct */
             if (!c->stream->feed ||
                 c->stream->feed == c->stream)
@@ -2241,9 +2241,9 @@ static int http_prepare_data(HTTPContext *c)
             else
                 src = c->stream->feed->streams[c->stream->feed_streams[i]];
 
-            *st = *src;
-            st->priv_data = 0;
-            st->codec->frame_number = 0; /* XXX: should be done in
+            *(c->fmt_ctx.streams[i]) = *src;
+            c->fmt_ctx.streams[i]->priv_data = 0;
+            c->fmt_ctx.streams[i]->codec->frame_number = 0; /* XXX: should be done in
                                            AVStream, not in codec */
         }
         /* set output format parameters */
@@ -3385,6 +3385,9 @@ static int rtp_new_av_stream(HTTPContext *c,
     if (!st)
         goto fail;
     ctx->nb_streams = 1;
+    ctx->streams = av_mallocz(sizeof(AVStream *) * ctx->nb_streams);
+    if (!ctx->streams)
+      goto fail;
     ctx->streams[0] = st;
 
     if (!c->stream->feed ||
@@ -3765,11 +3768,7 @@ static void build_feed_streams(void)
             }
             s->oformat = feed->fmt;
             s->nb_streams = feed->nb_streams;
-            for(i=0;i<s->nb_streams;i++) {
-                AVStream *st;
-                st = feed->streams[i];
-                s->streams[i] = st;
-            }
+            s->streams = feed->streams;
             av_set_parameters(s, NULL);
             if (av_write_header(s) < 0) {
                 http_log("Container doesn't supports the required parameters\n");

From 2159a245738cfec80dfcdeba8d2fbdc2db0f711c Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 15:16:19 -0400
Subject: [PATCH 312/830] swscale: remove if(accurate_rnd) branch from
 functions.

---
 libswscale/x86/swscale_template.c | 58 ++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 8e7f2411a1..442549e025 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -827,11 +827,10 @@
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
 
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-        if (c->flags & SWS_ACCURATE_RND) {
             if (uDest) {
                 YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
@@ -841,7 +840,12 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
             }
 
             YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        } else {
+}
+
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
             if (uDest) {
                 YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
@@ -851,10 +855,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
             }
 
             YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        }
 }
 
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
         long p= 4;
@@ -862,7 +865,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
         uint8_t *dst[4]= {aDest, dest, uDest, vDest};
         x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
 
-        if (c->flags & SWS_ACCURATE_RND) {
             while(p--) {
                 if (dst[p]) {
                     __asm__ volatile(
@@ -873,7 +875,16 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
                     );
                 }
             }
-        } else {
+}
+
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+    uint8_t *dst[4]= {aDest, dest, uDest, vDest};
+    x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
+
             while(p--) {
                 if (dst[p]) {
                     __asm__ volatile(
@@ -884,21 +895,19 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
                     );
                 }
             }
-        }
 }
 
 
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-        if (c->flags & SWS_ACCURATE_RND) {
             switch(c->dstFormat) {
             case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
@@ -979,8 +988,20 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 YSCALEYUV2PACKEDX_END
                 return;
             }
-        } else {
-            switch(c->dstFormat) {
+
+    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                   chrFilter, chrSrc, chrFilterSize,
+                   alpSrc, dest, dstW, dstY);
+}
+
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+        switch(c->dstFormat) {
             case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX
@@ -1053,7 +1074,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 YSCALEYUV2PACKEDX_END
                 return;
             }
-        }
 
     yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                    chrFilter, chrSrc, chrFilterSize,
@@ -2130,11 +2150,17 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat;
 
     if (!(c->flags & SWS_BITEXACT)) {
-        c->yuv2yuv1     = RENAME(yuv2yuv1    );
-        c->yuv2yuvX     = RENAME(yuv2yuvX    );
+        if (c->flags & SWS_ACCURATE_RND) {
+            c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
+            c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
+            c->yuv2packedX  = RENAME(yuv2packedX_ar );
+        } else {
+            c->yuv2yuv1     = RENAME(yuv2yuv1    );
+            c->yuv2yuvX     = RENAME(yuv2yuvX    );
+            c->yuv2packedX  = RENAME(yuv2packedX );
+        }
         c->yuv2packed1  = RENAME(yuv2packed1 );
         c->yuv2packed2  = RENAME(yuv2packed2 );
-        c->yuv2packedX  = RENAME(yuv2packedX );
     }
 
     c->hScale       = RENAME(hScale      );

From b6cac9b3bfba262f33875c549382b816c8e8cf26 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 15:18:40 -0400
Subject: [PATCH 313/830] swscale: remove if(full_chr_int) from yuv2packed1().

If that flag is set, swScale() already proxies the call to
yuv2rgbXinC_full(). Therefore, this flag is never set when
yuv2packed1() is called.
---
 libswscale/swscale_template.c     | 5 -----
 libswscale/x86/swscale_template.c | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index e27d45be25..aeeb42815e 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -127,11 +127,6 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
     const int yalpha= 4096; //FIXME ...
 
-    if (flags&SWS_FULL_CHR_H_INT) {
-        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-        return;
-    }
-
     if (uvalpha < 2048) {
         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
     } else {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 442549e025..355a1f5cd5 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1230,11 +1230,6 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
 {
         const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (flags&SWS_FULL_CHR_H_INT) {
-            c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-            return;
-        }
-
         if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
             switch(dstFormat) {
             case PIX_FMT_RGB32:

From acb96bc268014efbf2353f3285106cfdabe3ff54 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 16:02:47 -0400
Subject: [PATCH 314/830] swscale: remove if (c->dstFormat) branch from
 yuv2packed[12X]().

This allows cutting up the function in much smaller and easier-
to-maintain chunks.
---
 libswscale/x86/swscale_template.c | 426 ++++++++++++++++++------------
 1 file changed, 262 insertions(+), 164 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 355a1f5cd5..2b6d7e4a45 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -901,15 +901,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-            switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX_ACCURATE
                     YSCALEYUV2RGBX
@@ -932,8 +930,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                     YSCALEYUV2PACKEDX_END
                 }
-                return;
-            case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -947,8 +952,16 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
                 "r" (dest), "m" (dstW_reg)
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
-                return;
-            case PIX_FMT_RGB555:
+}
+
+
+static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -961,8 +974,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                 WRITERGB15(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -975,8 +995,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                 WRITERGB16(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 
@@ -986,23 +1013,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
                 "psraw $3, %%mm7    \n\t"
                 WRITEYUY2(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            }
-
-    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                   chrFilter, chrSrc, chrFilterSize,
-                   alpSrc, dest, dstW, dstY);
 }
 
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-        switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX
                     YSCALEYUV2RGBX
@@ -1019,8 +1038,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                     WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                     YSCALEYUV2PACKEDX_END
                 }
-                return;
-            case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor                    %%mm7, %%mm7       \n\t"
@@ -1033,8 +1059,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 "r" (dest),  "m" (dstW_reg)
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
-                return;
-            case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -1047,8 +1080,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
                 WRITERGB15(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -1061,8 +1101,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
                 WRITERGB16(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 
@@ -1072,23 +1119,14 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 "psraw $3, %%mm7    \n\t"
                 WRITEYUY2(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            }
-
-    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                   chrFilter, chrSrc, chrFilterSize,
-                   alpSrc, dest, dstW, dstY);
 }
 
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
 {
-        switch(c->dstFormat) {
-        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-        case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
                 __asm__ volatile(
@@ -1145,8 +1183,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                     "a" (&c->redDither)
                 );
             }
-            return;
-        case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1159,8 +1201,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1181,8 +1227,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1202,8 +1252,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov %4, %%"REG_b"                        \n\t"
@@ -1215,24 +1269,17 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        }
-
-    yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
-                  dest, dstW, yalpha, uvalpha, y);
 }
 
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
 {
         const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
         if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     __asm__ volatile(
                         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1262,8 +1309,45 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                         "a" (&c->redDither)
                     );
                 }
-                return;
-            case PIX_FMT_BGR24:
+        } else {
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                } else {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        "pcmpeqd %%mm7, %%mm7                   \n\t"
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                }
+        }
+}
+
+static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1277,8 +1361,29 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_RGB555:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1298,8 +1403,35 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_RGB565:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1320,8 +1452,36 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_YUYV422:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1334,100 +1494,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            }
         } else {
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1440,12 +1507,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            }
         }
-
-    yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
-                  dstW, uvalpha, dstFormat, flags, y);
 }
 
 //FIXME yuy2* can read up to 7 samples too much
@@ -2148,14 +2210,50 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
             c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
-            c->yuv2packedX  = RENAME(yuv2packedX_ar );
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
+            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
+            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
+            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
+            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
+            default: break;
+            }
         } else {
             c->yuv2yuv1     = RENAME(yuv2yuv1    );
             c->yuv2yuvX     = RENAME(yuv2yuvX    );
-            c->yuv2packedX  = RENAME(yuv2packedX );
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
+            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
+            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
+            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
+            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
+            default: break;
+            }
+        }
+        switch (c->dstFormat) {
+        case PIX_FMT_RGB32:
+                c->yuv2packed1 = RENAME(yuv2rgb32_1);
+                c->yuv2packed2 = RENAME(yuv2rgb32_2);
+                break;
+        case PIX_FMT_BGR24:
+                c->yuv2packed1 = RENAME(yuv2bgr24_1);
+                c->yuv2packed2 = RENAME(yuv2bgr24_2);
+                break;
+        case PIX_FMT_RGB555:
+                c->yuv2packed1 = RENAME(yuv2rgb555_1);
+                c->yuv2packed2 = RENAME(yuv2rgb555_2);
+                break;
+        case PIX_FMT_RGB565:
+                c->yuv2packed1 = RENAME(yuv2rgb565_1);
+                c->yuv2packed2 = RENAME(yuv2rgb565_2);
+                break;
+        case PIX_FMT_YUYV422:
+                c->yuv2packed1 = RENAME(yuv2yuyv422_1);
+                c->yuv2packed2 = RENAME(yuv2yuyv422_2);
+                break;
+        default:
+                break;
         }
-        c->yuv2packed1  = RENAME(yuv2packed1 );
-        c->yuv2packed2  = RENAME(yuv2packed2 );
     }
 
     c->hScale       = RENAME(hScale      );

From 71d9c33c8693b3cdd1122765ec2daabc07ab116a Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 16:45:41 -0400
Subject: [PATCH 315/830] swscale: remove duplicate mmx/mmx2 functions if they
 are identical.

---
 libswscale/x86/swscale_template.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 2b6d7e4a45..4ee32fff7e 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1510,6 +1510,7 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, co
         }
 }
 
+#if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
 static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
@@ -1691,6 +1692,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
 {
@@ -1825,7 +1827,7 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
 }
 
-
+#if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
                                   const int16_t *filter, const int16_t *filterPos, long filterSize)
@@ -1980,6 +1982,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
         );
     }
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 #if COMPILE_TEMPLATE_MMX2
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
@@ -2256,7 +2259,9 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         }
     }
 
+#if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
     // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
 #if COMPILE_TEMPLATE_MMX2
@@ -2272,6 +2277,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
+#if !COMPILE_TEMPLATE_MMX2
     switch(srcFormat) {
         case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
         case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
@@ -2285,6 +2291,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
         default: break;
     }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
     if (!c->chrSrcHSubSample) {
         switch(srcFormat) {
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -2294,6 +2301,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     }
 
     switch (srcFormat) {
+#if !COMPILE_TEMPLATE_MMX2
     case PIX_FMT_YUYV422  :
     case PIX_FMT_YUV420P16BE:
     case PIX_FMT_YUV422P16BE:
@@ -2305,14 +2313,17 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     case PIX_FMT_YUV422P16LE:
     case PIX_FMT_YUV444P16LE:
     case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
+#endif /* !COMPILE_TEMPLATE_MMX2 */
     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
     case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
     default: break;
     }
+#if !COMPILE_TEMPLATE_MMX2
     if (c->alpPixBuf) {
         switch (srcFormat) {
         case PIX_FMT_Y400A  : c->alpToYV12 = RENAME(yuy2ToY); break;
         default: break;
         }
     }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 }

From e2bad983b510f0e963f38dab38397abbd346bc6c Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 09:11:29 -0400
Subject: [PATCH 316/830] swscale: reformat x86/swscale_template.c.

Interleave macros and code so that it's easier to find the
actual code that belongs to a function. Also reindent where
appropriate and remove dead code.
---
 libswscale/x86/swscale_template.c | 1762 +++++++++++++++--------------
 1 file changed, 889 insertions(+), 873 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4ee32fff7e..ff208cd777 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -73,6 +73,24 @@
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
+{
+    if (uDest) {
+        YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+        YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+    }
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+    }
+
+    YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+}
+
 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
     __asm__ volatile(\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -135,6 +153,24 @@
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
+static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                       uint8_t *aDest, long dstW, long chrDstW)
+{
+    if (uDest) {
+        YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+        YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+    }
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+    }
+
+    YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+}
+
 #define YSCALEYUV2YV121 \
     "mov %2, %%"REG_a"                    \n\t"\
     ".p2align               4             \n\t" /* FIXME Unroll? */\
@@ -148,6 +184,28 @@
     "add                   $8, %%"REG_a"  \n\t"\
     "jnc                   1b             \n\t"
 
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
+                                    const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
+    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+
+    while (p--) {
+        if (dst[p]) {
+            __asm__ volatile(
+               YSCALEYUV2YV121
+               :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                  "g" (-counter[p])
+               : "%"REG_a
+            );
+        }
+    }
+}
+
 #define YSCALEYUV2YV121_ACCURATE \
     "mov %2, %%"REG_a"                    \n\t"\
     "pcmpeqw %%mm7, %%mm7                 \n\t"\
@@ -166,13 +224,28 @@
     "add                   $8, %%"REG_a"  \n\t"\
     "jnc                   1b             \n\t"
 
-/*
-    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
-       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
-       "r" (dest), "m" (dstW_reg),
-       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
-    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
-*/
+static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
+                                       const int16_t *chrSrc, const int16_t *alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                       uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
+    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+
+    while (p--) {
+        if (dst[p]) {
+            __asm__ volatile(
+                YSCALEYUV2YV121_ACCURATE
+                :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                   "g" (-counter[p])
+                : "%"REG_a
+            );
+        }
+    }
+}
+
 #define YSCALEYUV2PACKEDX_UV \
     __asm__ volatile(\
         "xor                   %%"REG_a", %%"REG_a"     \n\t"\
@@ -362,263 +435,6 @@
     "packuswb        %%mm6, %%mm5       \n\t"\
     "packuswb        %%mm3, %%mm4       \n\t"\
 
-#define REAL_YSCALEYUV2PACKED(index, c) \
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
-    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
-    "psraw                $3, %%mm0                           \n\t"\
-    "psraw                $3, %%mm1                           \n\t"\
-    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "xor            "#index", "#index"                        \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
-
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-
-#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
-    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define REAL_YSCALEYUV2RGB_COEFF(c) \
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
-
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
-    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
-    REAL_YSCALEYUV2RGB_COEFF(c)
-
-#define REAL_YSCALEYUV2PACKED1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $7, %%mm3     \n\t" \
-    "psraw                $7, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t" \
-
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
-
-#define REAL_YSCALEYUV2RGB1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
-
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
-    "xor "#index", "#index"             \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $8, %%mm3     \n\t" \
-    "psrlw                $8, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
-
-// do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
-    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
-
-#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
-    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
-    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
-    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
-    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
-    "packuswb          %%mm1, %%mm7     \n\t"
-#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
-
 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
     "movq       "#b", "#q2"     \n\t" /* B */\
     "movq       "#r", "#t"      \n\t" /* R */\
@@ -643,6 +459,64 @@
     " jb      1b                \n\t"
 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
 
+static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                          const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          int chrFilterSize, const int16_t **alpSrc,
+                                          uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        YSCALEYUV2PACKEDX_ACCURATE
+        YSCALEYUV2RGBX
+        "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
+        "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
+        "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
+        YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
+        "movq               "Y_TEMP"(%0), %%mm5         \n\t"
+        "psraw                        $3, %%mm1         \n\t"
+        "psraw                        $3, %%mm7         \n\t"
+        "packuswb                  %%mm7, %%mm1         \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    } else {
+        YSCALEYUV2PACKEDX_ACCURATE
+        YSCALEYUV2RGBX
+        "pcmpeqd %%mm7, %%mm7 \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    }
+}
+
+static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
+        "psraw                        $3, %%mm1         \n\t"
+        "psraw                        $3, %%mm7         \n\t"
+        "packuswb                  %%mm7, %%mm1         \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    } else {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        "pcmpeqd %%mm7, %%mm7 \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    }
+}
+
 #define REAL_WRITERGB16(dst, dstw, index) \
     "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
     "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
@@ -671,6 +545,50 @@
     " jb             1b             \n\t"
 #define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
 
+static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                           const int16_t **lumSrc, int lumFilterSize,
+                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           int chrFilterSize, const int16_t **alpSrc,
+                                           uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+    WRITERGB16(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
+static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
+                                        const int16_t **lumSrc, int lumFilterSize,
+                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        int chrFilterSize, const int16_t **alpSrc,
+                                        uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+    WRITERGB16(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
 #define REAL_WRITERGB15(dst, dstw, index) \
     "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
     "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
@@ -700,6 +618,50 @@
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
+static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                           const int16_t **lumSrc, int lumFilterSize,
+                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           int chrFilterSize, const int16_t **alpSrc,
+                                           uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+    WRITERGB15(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
+static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
+                                        const int16_t **lumSrc, int lumFilterSize,
+                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        int chrFilterSize, const int16_t **alpSrc,
+                                        uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+    WRITERGB15(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
 #define WRITEBGR24MMX(dst, dstw, index) \
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
     "movq      %%mm2, %%mm1     \n\t" /* B */\
@@ -809,6 +771,50 @@
 #define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
 #endif
 
+static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                          const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          int chrFilterSize, const int16_t **alpSrc,
+                                          uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
+    "add %4, %%"REG_c"                        \n\t"
+    WRITEBGR24(%%REGc, %5, %%REGa)
+    :: "r" (&c->redDither),
+       "m" (dummy), "m" (dummy), "m" (dummy),
+       "r" (dest), "m" (dstW_reg)
+    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    );
+}
+
+static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor                    %%mm7, %%mm7       \n\t"
+    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
+    "add                        %4, %%"REG_c"   \n\t"
+    WRITEBGR24(%%REGc, %5, %%REGa)
+    :: "r" (&c->redDither),
+       "m" (dummy), "m" (dummy), "m" (dummy),
+       "r" (dest),  "m" (dstW_reg)
+    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    );
+}
+
 #define REAL_WRITEYUY2(dst, dstw, index) \
     "packuswb  %%mm3, %%mm3     \n\t"\
     "packuswb  %%mm4, %%mm4     \n\t"\
@@ -826,688 +832,698 @@
     " jb          1b            \n\t"
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
-
-static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-            if (uDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-}
-
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-            if (uDest) {
-                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-}
-
-static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-        long p= 4;
-        const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
-        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
-
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121_ACCURATE
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-}
-
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-    long p= 4;
-    const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-    uint8_t *dst[4]= {aDest, dest, uDest, vDest};
-    x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
-
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-}
-
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                            const int16_t **lumSrc, int lumFilterSize,
+                                            const int16_t *chrFilter, const int16_t **chrSrc,
+                                            int chrFilterSize, const int16_t **alpSrc,
+                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
-                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
-                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
-                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
-                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                }
+    YSCALEYUV2PACKEDX_ACCURATE
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    "psraw $3, %%mm3    \n\t"
+    "psraw $3, %%mm4    \n\t"
+    "psraw $3, %%mm1    \n\t"
+    "psraw $3, %%mm7    \n\t"
+    WRITEYUY2(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
+                                         const int16_t **lumSrc, int lumFilterSize,
+                                         const int16_t *chrFilter, const int16_t **chrSrc,
+                                         int chrFilterSize, const int16_t **alpSrc,
+                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-                "add %4, %%"REG_c"                        \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest), "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
+    YSCALEYUV2PACKEDX
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    "psraw $3, %%mm3    \n\t"
+    "psraw $3, %%mm4    \n\t"
+    "psraw $3, %%mm1    \n\t"
+    "psraw $3, %%mm7    \n\t"
+    WRITEYUY2(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
 }
 
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
 
-static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
+#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
+    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
+#define REAL_YSCALEYUV2RGB_COEFF(c) \
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
 
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
+#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX_ACCURATE
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                }
-}
-
-static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-                "add                        %4, %%"REG_c"   \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest),  "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-}
-
-static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
+    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
+    REAL_YSCALEYUV2RGB_COEFF(c)
 
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *buf1, const uint16_t *uvbuf0,
+                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *abuf1, uint8_t *dest,
+                                       int dstW, int yalpha, int uvalpha, int y)
 {
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
-                __asm__ volatile(
-                    YSCALEYUV2RGB(%%r8, %5)
-                    YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
-                    "a" (&c->redDither)
-                    ,"r" (abuf0), "r" (abuf1)
-                    : "%r8"
-                );
+        __asm__ volatile(
+            YSCALEYUV2RGB(%%r8, %5)
+            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
+            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "packuswb            %%mm7, %%mm1       \n\t"
+            WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
+               "a" (&c->redDither),
+               "r" (abuf0), "r" (abuf1)
+            : "%r8"
+        );
 #else
-                *(const uint16_t **)(&c->u_temp)=abuf0;
-                *(const uint16_t **)(&c->v_temp)=abuf1;
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "push                   %0              \n\t"
-                    "push                   %1              \n\t"
-                    "mov          "U_TEMP"(%5), %0          \n\t"
-                    "mov          "V_TEMP"(%5), %1          \n\t"
-                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    "pop                    %1              \n\t"
-                    "pop                    %0              \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
+        *(const uint16_t **)(&c->u_temp)=abuf0;
+        *(const uint16_t **)(&c->v_temp)=abuf1;
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB(%%REGBP, %5)
+            "push                   %0              \n\t"
+            "push                   %1              \n\t"
+            "mov          "U_TEMP"(%5), %0          \n\t"
+            "mov          "V_TEMP"(%5), %1          \n\t"
+            YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
+            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "packuswb            %%mm7, %%mm1       \n\t"
+            "pop                    %1              \n\t"
+            "pop                    %0              \n\t"
+            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
 #endif
-            } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "pcmpeqd %%mm7, %%mm7                   \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-            }
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB(%%REGBP, %5)
+            "pcmpeqd %%mm7, %%mm7                   \n\t"
+            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *buf1, const uint16_t *uvbuf0,
+                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *abuf1, uint8_t *dest,
+                                       int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *buf1, const uint16_t *uvbuf0,
+                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *abuf1, uint8_t *dest,
+                                        int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+        WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *buf1, const uint16_t *uvbuf0,
+                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *abuf1, uint8_t *dest,
+                                        int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+        WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+#define REAL_YSCALEYUV2PACKED(index, c) \
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
+    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
+    "psraw                $3, %%mm0                           \n\t"\
+    "psraw                $3, %%mm1                           \n\t"\
+    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "xor            "#index", "#index"                        \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+
+static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
+                                         const uint16_t *buf1, const uint16_t *uvbuf0,
+                                         const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                         const uint16_t *abuf1, uint8_t *dest,
+                                         int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov %4, %%"REG_b"                        \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov %4, %%"REG_b"                        \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2PACKED(%%REGBP, %5)
+        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
+#define REAL_YSCALEYUV2RGB1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+
+// do vertical chrominance interpolation
+#define REAL_YSCALEYUV2RGB1b(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
+    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+
+#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
+    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
+    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
+    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
+    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
+    "packuswb          %%mm1, %%mm7     \n\t"
+#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
+
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest,
+                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                       int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
         } else {
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
         }
+    } else {
+        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
+        } else {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
+        }
+    }
 }
 
-static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest,
+                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                       int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *abuf0, uint8_t *dest,
+                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                        int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *abuf0, uint8_t *dest,
+                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                        int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+#define REAL_YSCALEYUV2PACKED1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $7, %%mm3     \n\t" \
+    "psraw                $7, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t" \
+
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
+    "xor "#index", "#index"             \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $8, %%mm3     \n\t" \
+    "psrlw                $8, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t"
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+
+static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
+                                         const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                         const uint16_t *abuf0, uint8_t *dest,
+                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                         int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1b(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2PACKED1(%%REGBP, %5)
+            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2PACKED1b(%%REGBP, %5)
+            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
 #if !COMPILE_TEMPLATE_MMX2

From 836b82e3c9fd4ac2b5240e143fbd9ef6118cc12b Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 17:50:03 -0400
Subject: [PATCH 317/830] swscale: reindent h[cy]scale_fast() and
 updateDitherTables().

---
 libswscale/x86/swscale_template.c | 261 +++++++++++++++---------------
 1 file changed, 131 insertions(+), 130 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ff208cd777..06cc3eef5c 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2014,64 +2014,64 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
 
-        __asm__ volatile(
+    __asm__ volatile(
 #if defined(PIC)
-            "mov               %%"REG_b", %5        \n\t"
+        "mov               %%"REG_b", %5        \n\t"
 #endif
-            "pxor                  %%mm7, %%mm7     \n\t"
-            "mov                      %0, %%"REG_c" \n\t"
-            "mov                      %1, %%"REG_D" \n\t"
-            "mov                      %2, %%"REG_d" \n\t"
-            "mov                      %3, %%"REG_b" \n\t"
-            "xor               %%"REG_a", %%"REG_a" \n\t" // i
-            PREFETCH"        (%%"REG_c")            \n\t"
-            PREFETCH"      32(%%"REG_c")            \n\t"
-            PREFETCH"      64(%%"REG_c")            \n\t"
+        "pxor                  %%mm7, %%mm7     \n\t"
+        "mov                      %0, %%"REG_c" \n\t"
+        "mov                      %1, %%"REG_D" \n\t"
+        "mov                      %2, %%"REG_d" \n\t"
+        "mov                      %3, %%"REG_b" \n\t"
+        "xor               %%"REG_a", %%"REG_a" \n\t" // i
+        PREFETCH"        (%%"REG_c")            \n\t"
+        PREFETCH"      32(%%"REG_c")            \n\t"
+        PREFETCH"      64(%%"REG_c")            \n\t"
 
 #if ARCH_X86_64
-
 #define CALL_MMX2_FILTER_CODE \
-            "movl            (%%"REG_b"), %%esi     \n\t"\
-            "call                    *%4            \n\t"\
-            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-            "add               %%"REG_S", %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl            (%%"REG_b"), %%esi     \n\t"\
+        "call                    *%4            \n\t"\
+        "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
+        "add               %%"REG_S", %%"REG_c" \n\t"\
+        "add               %%"REG_a", %%"REG_D" \n\t"\
+        "xor               %%"REG_a", %%"REG_a" \n\t"\
 
 #else
-
 #define CALL_MMX2_FILTER_CODE \
-            "movl (%%"REG_b"), %%esi        \n\t"\
-            "call         *%4                       \n\t"\
-            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl (%%"REG_b"), %%esi        \n\t"\
+        "call         *%4                       \n\t"\
+        "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+        "add               %%"REG_a", %%"REG_D" \n\t"\
+        "xor               %%"REG_a", %%"REG_a" \n\t"\
 
 #endif /* ARCH_X86_64 */
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-            "mov                      %5, %%"REG_b" \n\t"
+        "mov                      %5, %%"REG_b" \n\t"
 #endif
-            :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode)
+        :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode)
 #if defined(PIC)
-            ,"m" (ebxsave)
+          ,"m" (ebxsave)
 #endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
 #if !defined(PIC)
-            ,"%"REG_b
+         ,"%"REG_b
 #endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
+    );
+
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
 }
 
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
@@ -2087,54 +2087,55 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
 
-        __asm__ volatile(
+    __asm__ volatile(
 #if defined(PIC)
-            "mov          %%"REG_b", %6         \n\t"
+        "mov          %%"REG_b", %6         \n\t"
 #endif
-            "pxor             %%mm7, %%mm7      \n\t"
-            "mov                 %0, %%"REG_c"  \n\t"
-            "mov                 %1, %%"REG_D"  \n\t"
-            "mov                 %2, %%"REG_d"  \n\t"
-            "mov                 %3, %%"REG_b"  \n\t"
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
+        "pxor             %%mm7, %%mm7      \n\t"
+        "mov                 %0, %%"REG_c"  \n\t"
+        "mov                 %1, %%"REG_D"  \n\t"
+        "mov                 %2, %%"REG_d"  \n\t"
+        "mov                 %3, %%"REG_b"  \n\t"
+        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+        PREFETCH"   (%%"REG_c")             \n\t"
+        PREFETCH" 32(%%"REG_c")             \n\t"
+        PREFETCH" 64(%%"REG_c")             \n\t"
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            "mov                 %5, %%"REG_c"  \n\t" // src
-            "mov                 %1, %%"REG_D"  \n\t" // buf1
-            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+        "mov                 %5, %%"REG_c"  \n\t" // src
+        "mov                 %1, %%"REG_D"  \n\t" // buf1
+        "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+        PREFETCH"   (%%"REG_c")             \n\t"
+        PREFETCH" 32(%%"REG_c")             \n\t"
+        PREFETCH" 64(%%"REG_c")             \n\t"
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-            "mov %6, %%"REG_b"    \n\t"
+        "mov %6, %%"REG_b"    \n\t"
 #endif
-            :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode), "m" (src2)
+        :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode), "m" (src2)
 #if defined(PIC)
-            ,"m" (ebxsave)
+          ,"m" (ebxsave)
 #endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
 #if !defined(PIC)
-            ,"%"REG_b
+         ,"%"REG_b
 #endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-            dst[i] = src1[srcW-1]*128;
-            dst[i+VOFW] = src2[srcW-1]*128;
-        }
+    );
+
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst[i] = src1[srcW-1]*128;
+        dst[i+VOFW] = src2[srcW-1]*128;
+    }
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
@@ -2162,62 +2163,62 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
     const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
 
-        c->blueDither= ff_dither8[dstY&1];
-        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-            c->greenDither= ff_dither8[dstY&1];
-        else
-            c->greenDither= ff_dither4[dstY&1];
-        c->redDither= ff_dither8[(dstY+1)&1];
-        if (dstY < dstH - 2) {
-            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            int i;
-            if (flags & SWS_ACCURATE_RND) {
-                int s= APCK_SIZE / 8;
-                for (i=0; i<vLumFilterSize; i+=2) {
-                    *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                    *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                              lumMmxFilter[s*i+APCK_COEF/4  ]=
-                              lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                        + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
-                        *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
-                                  alpMmxFilter[s*i+APCK_COEF/4  ]=
-                                  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i+=2) {
-                    *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                    *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
-                              chrMmxFilter[s*i+APCK_COEF/4  ]=
-                              chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                        + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-                }
-            } else {
-                for (i=0; i<vLumFilterSize; i++) {
-                    lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                    lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
-                    lumMmxFilter[4*i+2]=
-                    lumMmxFilter[4*i+3]=
-                        ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
-                        alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
-                        alpMmxFilter[4*i+2]=
-                        alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i++) {
-                    chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                    chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
-                    chrMmxFilter[4*i+2]=
-                    chrMmxFilter[4*i+3]=
-                        ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+    c->blueDither= ff_dither8[dstY&1];
+    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+        c->greenDither= ff_dither8[dstY&1];
+    else
+        c->greenDither= ff_dither4[dstY&1];
+    c->redDither= ff_dither8[(dstY+1)&1];
+    if (dstY < dstH - 2) {
+        const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+        const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+        int i;
+        if (flags & SWS_ACCURATE_RND) {
+            int s= APCK_SIZE / 8;
+            for (i=0; i<vLumFilterSize; i+=2) {
+                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                lumMmxFilter[s*i+APCK_COEF/4  ]=
+                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
+                           + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
+                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
+                    alpMmxFilter[s*i+APCK_COEF/4  ]=
+                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
                 }
             }
+            for (i=0; i<vChrFilterSize; i+=2) {
+                *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                chrMmxFilter[s*i+APCK_COEF/4  ]=
+                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                           + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
+            }
+        } else {
+            for (i=0; i<vLumFilterSize; i++) {
+                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
+                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                lumMmxFilter[4*i+2]=
+                lumMmxFilter[4*i+3]=
+                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
+                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
+                    alpMmxFilter[4*i+2]=
+                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
+                }
+            }
+            for (i=0; i<vChrFilterSize; i++) {
+                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
+                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                chrMmxFilter[4*i+2]=
+                chrMmxFilter[4*i+3]=
+                    ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+            }
         }
+    }
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 

From 522d65ba259a263d0cd91db27b79c07e13d7fcf2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 18:28:40 -0400
Subject: [PATCH 318/830] rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2
 functions.

Many functions have such a prefix, but do not actually use any
instructions or features from that set, thus giving the false
impression that swscale is highly optimized for a particular
system, whereas in reality it is not.
---
 libswscale/x86/rgb2rgb.c          |  2 +-
 libswscale/x86/rgb2rgb_template.c | 74 +++++++++++++++++--------------
 2 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index cf901affe6..97c50dd636 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -111,7 +111,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_AMD3DNOW
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_SSE2 1
+#define COMPILE_TEMPLATE_SSE2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNOW
 #include "rgb2rgb_template.c"
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index ce635dfde4..70673f75d1 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -30,15 +30,8 @@
 #undef MOVNTQ
 #undef EMMS
 #undef SFENCE
-#undef MMREG_SIZE
 #undef PAVGB
 
-#if COMPILE_TEMPLATE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
 #if COMPILE_TEMPLATE_AMD3DNOW
 #define PREFETCH  "prefetch"
 #define PAVGB     "pavgusb"
@@ -64,6 +57,10 @@
 #define SFENCE " # nop"
 #endif
 
+#if !COMPILE_TEMPLATE_SSE2
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+
 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     uint8_t *dest = dst;
@@ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
 {
     long x,y;
@@ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     dst+= dstStride;
 
     for (y=1; y<srcHeight; y++) {
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
         const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
@@ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
             "punpckhbw              %%mm3, %%mm7    \n\t"
             "punpcklbw              %%mm2, %%mm4    \n\t"
             "punpckhbw              %%mm2, %%mm6    \n\t"
-#if 1
             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#else
-            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#endif
             "add                       $8, %%"REG_a"            \n\t"
             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
@@ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
                "g" (-mmxSize)
             : "%"REG_a
         );
-#else
-        const x86_reg mmxSize=1;
-
-        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
-        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
-#endif
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
@@ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     }
 
     // last line
-#if 1
     dst[0]= src[0];
 
     for (x=0; x<srcWidth-1; x++) {
@@ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
     }
     dst[2*srcWidth-1]= src[srcWidth-1];
-#else
-    for (x=0; x<srcWidth; x++) {
-        dst[2*x+0]=
-        dst[2*x+1]= src[x];
-    }
-#endif
 
     __asm__ volatile(EMMS"       \n\t"
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 
      rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
 }
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
                                     long width, long height, long src1Stride,
                                     long src2Stride, long dstStride)
@@ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::: "memory"
             );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
                                        long width, long height,
@@ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
 {
@@ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
@@ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
 static inline void RENAME(rgb2rgb_init)(void)
 {
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
     rgb15to16          = RENAME(rgb15to16);
     rgb15tobgr24       = RENAME(rgb15tobgr24);
     rgb15to32          = RENAME(rgb15to32);
@@ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void)
     yuv422ptoyuy2      = RENAME(yuv422ptoyuy2);
     yuv422ptouyvy      = RENAME(yuv422ptouyvy);
     yuy2toyv12         = RENAME(yuy2toyv12);
-    planar2x           = RENAME(planar2x);
-    rgb24toyv12        = RENAME(rgb24toyv12);
-    interleaveBytes    = RENAME(interleaveBytes);
     vu9_to_vu12        = RENAME(vu9_to_vu12);
     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
-
-    uyvytoyuv420       = RENAME(uyvytoyuv420);
     uyvytoyuv422       = RENAME(uyvytoyuv422);
-    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
     yuyvtoyuv422       = RENAME(yuyvtoyuv422);
+#endif /* !COMPILE_TEMPLATE_SSE2 */
+
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+    planar2x           = RENAME(planar2x);
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
+    rgb24toyv12        = RENAME(rgb24toyv12);
+
+    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
+    uyvytoyuv420       = RENAME(uyvytoyuv420);
+#endif /* COMPILE_TEMPLATE_SSE2 */
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+    interleaveBytes    = RENAME(interleaveBytes);
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 }

From 69645c021ad2cc503769b44b9008f98b014423de Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 09:15:38 -0400
Subject: [PATCH 319/830] swscale: replace formatConvBuffer[VOF] by allocated
 array.

This allows to convert between formats of arbitrary width,
regardless of the value of VOF/VOFW.
---
 libswscale/swscale_internal.h | 2 +-
 libswscale/swscale_template.c | 5 +++--
 libswscale/utils.c            | 6 ++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5f2ff94691..1e52ea2a95 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -122,7 +122,7 @@ typedef struct SwsContext {
     int       chrBufIndex;        ///< Index in ring buffer of the last scaled horizontal chroma     line from source.
     //@}
 
-    uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
+    uint8_t *formatConvBuffer;
 
     /**
      * @name Horizontal and vertical filters.
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index aeeb42815e..8784359dc2 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -459,9 +459,10 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
     src2 += c->chrSrcOffset;
 
     if (c->chrToYV12) {
-        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
+        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
         src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
+        src2= buf2;
     }
 
     if (!c->hcscale_fast) {
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 96b3207cdd..20f07d672e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -790,10 +790,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }
-    if(srcW > VOFW || dstW > VOFW) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
-        return AVERROR(EINVAL);
-    }
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2, fail);
 
     if (!dstFilter) dstFilter= &dummyFilter;
     if (!srcFilter) srcFilter= &dummyFilter;
@@ -1507,6 +1504,7 @@ void sws_freeContext(SwsContext *c)
 #endif /* HAVE_MMX */
 
     av_freep(&c->yuvTable);
+    av_free(c->formatConvBuffer);
 
     av_free(c);
 }

From b4a224c5e4109cb2cca8bac38628673d685fe763 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 25 May 2011 14:30:09 -0400
Subject: [PATCH 320/830] swscale: split chroma buffers into separate U/V
 planes.

Preparatory step to implement support for sizes > VOFW.
---
 libswscale/swscale.c              |  68 +++---
 libswscale/swscale_internal.h     |  27 ++-
 libswscale/swscale_template.c     | 130 +++++-----
 libswscale/utils.c                |  19 +-
 libswscale/x86/swscale_template.c | 380 +++++++++++++++++-------------
 5 files changed, 359 insertions(+), 265 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 36b676bf06..54a75971be 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -207,7 +207,8 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 #endif
 
 static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                                    const int16_t **chrVSrc, int chrFilterSize,
                                                     const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
                                                     int dstW, int chrDstW, int big_endian, int output_bits)
 {
@@ -246,8 +247,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
             int j;
 
             for (j = 0; j < chrFilterSize; j++) {
-                u += chrSrc[j][i       ] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             output_pixel(&uDest[i], u);
@@ -271,13 +272,14 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
 #define yuv2NBPS(bits, BE_LE, is_be) \
 static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
                               const int16_t **lumSrc, int lumFilterSize, \
-                              const int16_t *chrFilter, const int16_t **chrSrc, \
+                              const int16_t *chrFilter, const int16_t **chrUSrc, \
+                              const int16_t **chrVSrc, \
                               int chrFilterSize, const int16_t **alpSrc, \
                               uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
                               uint16_t *aDest, int dstW, int chrDstW) \
 { \
     yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
-                           chrFilter, chrSrc, chrFilterSize, \
+                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                            alpSrc, \
                            dest, uDest, vDest, aDest, \
                            dstW, chrDstW, is_be, bits); \
@@ -290,20 +292,20 @@ yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
 static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
                                  const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
                                  enum PixelFormat dstFormat)
 {
 #define conv16(bits) \
     if (isBE(dstFormat)) { \
         yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrSrc, chrFilterSize, \
+                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                                alpSrc, \
                                dest, uDest, vDest, aDest, \
                                dstW, chrDstW); \
     } else { \
         yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrSrc, chrFilterSize, \
+                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                                alpSrc, \
                                dest, uDest, vDest, aDest, \
                                dstW, chrDstW); \
@@ -319,7 +321,8 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
 }
 
 static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc, int chrFilterSize,
                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
 {
     //FIXME Optimize (just quickly written not optimized..)
@@ -339,8 +342,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[i]= av_clip_uint8(u>>19);
@@ -360,7 +363,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 }
 
 static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
                                 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
 {
     //FIXME Optimize (just quickly written not optimized..)
@@ -383,8 +387,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[2*i]= av_clip_uint8(u>>19);
@@ -396,8 +400,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[2*i]= av_clip_uint8(v>>19);
@@ -421,8 +425,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
         }\
         for (j=0; j<chrFilterSize; j++) {\
-            U += chrSrc[j][i] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+            U += chrUSrc[j][i] * chrFilter[j];\
+            V += chrVSrc[j][i] * chrFilter[j];\
         }\
         Y1>>=19;\
         Y2>>=19;\
@@ -469,8 +473,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             Y += lumSrc[j][i     ] * lumFilter[j];\
         }\
         for (j=0; j<chrFilterSize; j++) {\
-            U += chrSrc[j][i     ] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+            U += chrUSrc[j][i] * chrFilter[j];\
+            V += chrVSrc[j][i] * chrFilter[j];\
         }\
         Y >>=10;\
         U >>=10;\
@@ -535,8 +539,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;       \
         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
-        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
-        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
+        int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
+        int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
         type av_unused *r, *b, *g;                                    \
         int av_unused A1, A2;                                         \
         if (alpha) {\
@@ -561,8 +565,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
         int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf1[i     ])>>7;\
-        int V= (uvbuf1[i+VOFW])>>7;\
+        int U= (ubuf1[i])>>7;\
+        int V= (vbuf1[i])>>7;\
         type av_unused *r, *b, *g;\
         int av_unused A1, A2;\
         if (alpha) {\
@@ -587,8 +591,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
         int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
-        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
+        int U= (ubuf0[i] + ubuf1[i])>>8;\
+        int V= (vbuf0[i] + vbuf1[i])>>8;\
         type av_unused *r, *b, *g;\
         int av_unused A1, A2;\
         if (alpha) {\
@@ -870,16 +874,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         break;\
     }
 
-static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter,
+                                  const int16_t **lumSrc, int lumFilterSize,
+                                  const int16_t *chrFilter, const int16_t **chrUSrc,
+                                  const int16_t **chrVSrc, int chrFilterSize,
                                   const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 }
 
-static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc, int chrFilterSize,
                                     const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 1e52ea2a95..964f710179 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -112,7 +112,8 @@ typedef struct SwsContext {
      */
     //@{
     int16_t **lumPixBuf;          ///< Ring buffer for scaled horizontal luma   plane lines to be fed to the vertical scaler.
-    int16_t **chrPixBuf;          ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **chrUPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **chrVPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
     int16_t **alpPixBuf;          ///< Ring buffer for scaled horizontal alpha  plane lines to be fed to the vertical scaler.
     int       vLumBufSize;        ///< Number of vertical luma/alpha lines allocated in the ring buffer.
     int       vChrBufSize;        ///< Number of vertical chroma     lines allocated in the ring buffer.
@@ -200,6 +201,7 @@ typedef struct SwsContext {
 #define V_TEMP                "11*8+4*4*256*2+32"
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
+#define UV_OFF                "11*8+4*4*256*3+48"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -222,6 +224,7 @@ typedef struct SwsContext {
     DECLARE_ALIGNED(8, uint64_t, v_temp);
     DECLARE_ALIGNED(8, uint64_t, y_temp);
     int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
 
 #if HAVE_ALTIVEC
     vector signed short   CY;
@@ -255,36 +258,42 @@ typedef struct SwsContext {
     /* function pointers for swScale() */
     void (*yuv2nv12X  )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         uint8_t *dest, uint8_t *uDest,
                         int dstW, int chrDstW, int dstFormat);
     void (*yuv2yuv1   )(struct SwsContext *c,
-                        const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                        const int16_t *lumSrc, const int16_t *chrUSrc,
+                        const int16_t *chrVSrc, const int16_t *alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
                         long dstW, long chrDstW);
     void (*yuv2yuvX   )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
                         long dstW, long chrDstW);
     void (*yuv2packed1)(struct SwsContext *c,
                         const uint16_t *buf0,
-                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                         const uint16_t *abuf0,
                         uint8_t *dest,
                         int dstW, int uvalpha, int dstFormat, int flags, int y);
     void (*yuv2packed2)(struct SwsContext *c,
                         const uint16_t *buf0, const uint16_t *buf1,
-                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                         const uint16_t *abuf0, const uint16_t *abuf1,
                         uint8_t *dest,
                         int dstW, int yalpha, int uvalpha, int y);
     void (*yuv2packedX)(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc, uint8_t *dest,
                         long dstW, long dstY);
 
@@ -299,7 +308,7 @@ typedef struct SwsContext {
                          int16_t *dst, long dstWidth,
                          const uint8_t *src, int srcW, int xInc);
     void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst, long dstWidth,
+                         int16_t *dst1, int16_t *dst2, long dstWidth,
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
 
@@ -308,7 +317,7 @@ typedef struct SwsContext {
                    long filterSize);
 
     void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
-    void (*chrConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for chroma planes if needed.
+    void (*chrConvertRange)(uint16_t *dst1, uint16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
 
     int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions.
     int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions.
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 8784359dc2..435f1a200f 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -20,29 +20,32 @@
 
 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrSrc,
+                              const int16_t *chrFilter, const int16_t **chrUSrc,
+                              const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                               uint8_t *aDest, long dstW, long chrDstW)
 {
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrSrc, chrFilterSize,
+                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
 }
 
 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrSrc,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc,
                                int chrFilterSize, uint8_t *dest, uint8_t *uDest,
                                int dstW, int chrDstW, enum PixelFormat dstFormat)
 {
     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrSrc, chrFilterSize,
+                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                  dest, uDest, dstW, chrDstW, dstFormat);
 }
 
 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                              const int16_t *chrSrc, const int16_t *alpSrc,
+                              const int16_t *chrUSrc, const int16_t *chrVSrc,
+                              const int16_t *alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                               uint8_t *aDest, long dstW, long chrDstW)
 {
@@ -60,8 +63,8 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
-            int u=(chrSrc[i       ]+64)>>7;
-            int v=(chrSrc[i + VOFW]+64)>>7;
+            int u=(chrUSrc[i]+64)>>7;
+            int v=(chrVSrc[i]+64)>>7;
 
             if ((u|v)&256) {
                 if (u<0)        u=0;
@@ -87,12 +90,13 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
  */
 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                                  const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrSrc,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
                                  int chrFilterSize, const int16_t **alpSrc,
                                  uint8_t *dest, long dstW, long dstY)
 {
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
 
@@ -100,8 +104,9 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
  * vertical bilinear scale YV12 to RGB
  */
 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *uvbuf0,
-                                 const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
                                  const uint16_t *abuf1, uint8_t *dest, int dstW,
                                  int yalpha, int uvalpha, int y)
 {
@@ -116,7 +121,8 @@ static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
  * YV12 to RGB without scaling or interpolating
  */
 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
                                  const uint16_t *abuf0, uint8_t *dest, int dstW,
                                  int uvalpha, enum PixelFormat dstFormat,
                                  int flags, int y)
@@ -359,20 +365,20 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
 
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
-static void chrRangeToJpeg_c(uint16_t *dst, int width)
+static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
-        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
-        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
+        dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
+        dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
     }
 }
-static void chrRangeFromJpeg_c(uint16_t *dst, int width)
+static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
-        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
-        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
+        dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
+        dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
     }
 }
 static void lumRangeToJpeg_c(uint16_t *dst, int width)
@@ -429,7 +435,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
         convertRange(dst, dstWidth);
 }
 
-static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
+static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                   long dstWidth, const uint8_t *src1,
                                   const uint8_t *src2, int srcW, int xInc)
 {
@@ -438,17 +444,13 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
     for (i=0;i<dstWidth;i++) {
         register unsigned int xx=xpos>>16;
         register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
-        dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
-        /* slower
-        dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
-        dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
-        */
+        dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+        dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
         xpos+=xInc;
     }
 }
 
-inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth,
                              const uint8_t *src1, const uint8_t *src2,
                              int srcW, int xInc, const int16_t *hChrFilter,
                              const int16_t *hChrFilterPos, int hChrFilterSize,
@@ -466,14 +468,14 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
     }
 
     if (!c->hcscale_fast) {
-        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
+        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
     }
 
     if (c->chrConvertRange)
-        c->chrConvertRange(dst, dstWidth);
+        c->chrConvertRange(dst1, dst2, dstWidth);
 }
 
 #define DEBUG_SWSCALE_BUFFERS 0
@@ -513,7 +515,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
     const int hLumFilterSize= c->hLumFilterSize;
     const int hChrFilterSize= c->hChrFilterSize;
     int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -641,10 +644,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
             //FIXME replace parameters through context struct (some at least)
 
             if (c->needs_hcscale)
-                hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                                hChrFilter, hChrFilterPos, hChrFilterSize,
-                                formatConvBuffer,
-                                pal);
+                hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
+                          chrDstW, src1, src2, chrSrcW, chrXInc,
+                          hChrFilter, hChrFilterPos, hChrFilterSize,
+                          formatConvBuffer, pal);
             lastInChrBuf++;
             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
                                chrBufIndex,    lastInChrBuf);
@@ -660,47 +663,54 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
 #endif
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
                 c->yuv2nv12X(c,
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                              dest, uDest, dstW, chrDstW, dstFormat);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                    yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                  chrVSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
+                                  (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
                                   dstFormat);
                 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                     const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrBuf= chrSrcPtr[0];
+                    const int16_t *chrUBuf= chrUSrcPtr[0];
+                    const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
+                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
+                                uDest, vDest, aDest, dstW, chrDstW);
                 } else { //General YV12
                     c->yuv2yuvX(c,
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                chrVSrcPtr, vChrFilterSize,
                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
                 }
             } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c, //FIXME write a packed1_full function
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
+                                         chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
                                        alpPixBuf ? *alpSrcPtr : NULL,
                                        dest, dstW, chrAlpha, dstFormat, flags, dstY);
                     }
@@ -714,10 +724,11 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c, //FIXME write a packed2_full function
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
                                        alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
                                        dest, dstW, lumAlpha, chrAlpha, dstY);
                     }
@@ -725,26 +736,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c,
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
                         c->yuv2packedX(c,
                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                        alpSrcPtr, dest, dstW, dstY);
                     }
                 }
             }
         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
             const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
                 yuv2nv12XinC(
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                              dest, uDest, dstW, chrDstW, dstFormat);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
@@ -752,27 +764,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
                     yuv2yuvX16inC(
                                   vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
                                   dstFormat);
                 } else {
                     yuv2yuvXinC(
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
                 }
             } else {
                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if(flags & SWS_FULL_CHR_H_INT) {
                     yuv2rgbXinC_full(c,
                                      vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                     vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                      alpSrcPtr, dest, dstW, dstY);
                 } else {
                     yuv2packedXinC(c,
                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                    alpSrcPtr, dest, dstW, dstY);
                 }
             }
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 20f07d672e..cb174e5b8e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -989,7 +989,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     // allocate pixbufs (we use dynamic allocation because otherwise we would need to
     // allocate several megabytes to handle all possible cases)
     FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail);
-    FF_ALLOC_OR_GOTO(c, c->chrPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
+    FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
+    FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
     if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
         FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail);
     //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
@@ -998,9 +999,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], VOF+1, fail);
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
+    c->uv_off = VOFW;
     for (i=0; i<c->vChrBufSize; i++) {
-        FF_ALLOC_OR_GOTO(c, c->chrPixBuf[i+c->vChrBufSize], (VOF+1)*2, fail);
-        c->chrPixBuf[i] = c->chrPixBuf[i+c->vChrBufSize];
+        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], VOF*2+1, fail);
+        c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
+        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + VOFW;
     }
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
         for (i=0; i<c->vLumBufSize; i++) {
@@ -1009,7 +1012,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
 
     //try to avoid drawing green stuff between the right end and the stride end
-    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
+    for (i=0; i<c->vChrBufSize; i++)
+        memset(c->chrUPixBuf[i], 64, VOF*2+1);
 
     assert(2*VOFW == VOF);
 
@@ -1462,10 +1466,11 @@ void sws_freeContext(SwsContext *c)
         av_freep(&c->lumPixBuf);
     }
 
-    if (c->chrPixBuf) {
+    if (c->chrUPixBuf) {
         for (i=0; i<c->vChrBufSize; i++)
-            av_freep(&c->chrPixBuf[i]);
-        av_freep(&c->chrPixBuf);
+            av_freep(&c->chrUPixBuf[i]);
+        av_freep(&c->chrUPixBuf);
+        av_freep(&c->chrVPixBuf);
     }
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 06cc3eef5c..0de7e94c97 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -37,9 +37,8 @@
 #endif
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
-#define YSCALEYUV2YV12X(x, offset, dest, width) \
+#define YSCALEYUV2YV12X(offset, dest, end, pos) \
     __asm__ volatile(\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
         "movq                             %%mm3, %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -47,8 +46,8 @@
         ".p2align                             4             \n\t" /* FIXME Unroll? */\
         "1:                                                 \n\t"\
         "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm5      \n\t" /* srcData */\
         "add                                $16, %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "test                         %%"REG_S", %%"REG_S"  \n\t"\
@@ -61,40 +60,40 @@
         "psraw                               $3, %%mm4      \n\t"\
         "packuswb                         %%mm4, %%mm3      \n\t"\
         MOVNTQ(%%mm3, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "add                                 $8, %3         \n\t"\
+        "cmp                                 %2, %3         \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
         "movq                             %%mm3, %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
         :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
-        : "%"REG_a, "%"REG_d, "%"REG_S\
+           "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
+        : "%"REG_d, "%"REG_S\
     );
 
 static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
                                     int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
-        YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-        YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
-        YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+        YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
-#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
+#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
     __asm__ volatile(\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
         "pxor                             %%mm4, %%mm4      \n\t"\
         "pxor                             %%mm5, %%mm5      \n\t"\
         "pxor                             %%mm6, %%mm6      \n\t"\
@@ -102,10 +101,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         ".p2align                             4             \n\t"\
         "1:                                                 \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm0      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
         "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm1      \n\t" /* srcData */\
         "movq                             %%mm0, %%mm3      \n\t"\
         "punpcklwd                        %%mm1, %%mm0      \n\t"\
         "punpckhwd                        %%mm1, %%mm3      \n\t"\
@@ -114,7 +113,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "pmaddwd                          %%mm1, %%mm3      \n\t"\
         "paddd                            %%mm0, %%mm4      \n\t"\
         "paddd                            %%mm3, %%mm5      \n\t"\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm3      \n\t" /* srcData */\
         "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
         "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
         "test                         %%"REG_S", %%"REG_S"  \n\t"\
@@ -139,8 +138,8 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
         MOVNTQ(%%mm4, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "add                                 $8, %3         \n\t"\
+        "cmp                                 %2, %3         \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "pxor                             %%mm4, %%mm4      \n\t"\
         "pxor                             %%mm5, %%mm5      \n\t"\
@@ -149,26 +148,27 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
         :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
+        "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
 static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
-        YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-        YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
-        YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+        YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
 #define YSCALEYUV2YV121 \
@@ -185,12 +185,13 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     "jnc                   1b             \n\t"
 
 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
-                                    const int16_t *chrSrc, const int16_t *alpSrc,
+                                    const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                    const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     long p= 4;
-    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -225,12 +226,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
     "jnc                   1b             \n\t"
 
 static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
-                                       const int16_t *chrSrc, const int16_t *alpSrc,
+                                       const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                       const int16_t *alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     long p= 4;
-    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -260,7 +262,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         "2:                                             \n\t"\
         "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
         "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"     \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
         "add                         $16, %%"REG_d"     \n\t"\
         "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
         "pmulhw                    %%mm0, %%mm2         \n\t"\
@@ -296,7 +299,7 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 #define YSCALEYUV2PACKEDX_END                     \
         :: "r" (&c->redDither),                   \
             "m" (dummy), "m" (dummy), "m" (dummy),\
-            "r" (dest), "m" (dstW_reg)            \
+            "r" (dest), "m" (dstW_reg), "m"(uv_off) \
         : "%"REG_a, "%"REG_d, "%"REG_S            \
     );
 
@@ -315,7 +318,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         ".p2align                      4                \n\t"\
         "2:                                             \n\t"\
         "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"      \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
         "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
         "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
         "movq                      %%mm0, %%mm3         \n\t"\
@@ -326,7 +330,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         "pmaddwd                   %%mm1, %%mm3         \n\t"\
         "paddd                     %%mm0, %%mm4         \n\t"\
         "paddd                     %%mm3, %%mm5         \n\t"\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"      \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
         "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
         "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
         "test                  %%"REG_S", %%"REG_S"     \n\t"\
@@ -461,12 +466,14 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
 static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                           const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                                          const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
                                           uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX_ACCURATE
@@ -492,12 +499,14 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
 
 static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX
@@ -547,12 +556,14 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                            const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           const int16_t *chrFilter, const int16_t **chrUSrc,
+                                           const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -569,12 +580,14 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
 
 static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                                        const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -620,12 +633,14 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                            const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           const int16_t *chrFilter, const int16_t **chrUSrc,
+                                           const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -642,12 +657,14 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
 
 static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                                        const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -773,12 +790,14 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                           const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                                          const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
                                           uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -788,19 +807,21 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
     WRITEBGR24(%%REGc, %5, %%REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest), "m" (dstW_reg)
+       "r" (dest), "m" (dstW_reg), "m"(uv_off)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
 
 static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -810,7 +831,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     WRITEBGR24(%%REGc, %5, %%REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest),  "m" (dstW_reg)
+       "r" (dest),  "m" (dstW_reg), "m"(uv_off)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
@@ -834,12 +855,14 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                             const int16_t **lumSrc, int lumFilterSize,
-                                            const int16_t *chrFilter, const int16_t **chrSrc,
+                                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                                            const int16_t **chrVSrc,
                                             int chrFilterSize, const int16_t **alpSrc,
                                             uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -853,12 +876,14 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
 
 static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
                                          const int16_t **lumSrc, int lumFilterSize,
-                                         const int16_t *chrFilter, const int16_t **chrSrc,
+                                         const int16_t *chrFilter, const int16_t **chrUSrc,
+                                         const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
                                          uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -870,14 +895,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
     YSCALEYUV2PACKEDX_END
 }
 
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
+#define REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -940,8 +967,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 
 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
+#define YSCALEYUV2RGB(index, c, uv_off) \
+    REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
     REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
     REAL_YSCALEYUV2RGB_COEFF(c)
 
@@ -949,23 +976,26 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
  * vertical bilinear scale YV12 to RGB
  */
 static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *uvbuf0,
-                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *buf1, const uint16_t *ubuf0,
+                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                       const uint16_t *vbuf1, const uint16_t *abuf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
         __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5)
+            YSCALEYUV2RGB(%%r8, %5, %8)
             YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "packuswb            %%mm7, %%mm1       \n\t"
             WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
                "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1)
+               "r" (abuf0), "r" (abuf1), "m"(uv_off)
             : "%r8"
         );
 #else
@@ -975,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            YSCALEYUV2RGB(%%REGBP, %5, %6)
             "push                   %0              \n\t"
             "push                   %1              \n\t"
             "mov          "U_TEMP"(%5), %0          \n\t"
@@ -990,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+               "a" (&c->redDither), "m"(uv_off)
         );
 #endif
     } else {
@@ -998,50 +1028,56 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            YSCALEYUV2RGB(%%REGBP, %5, %6)
             "pcmpeqd %%mm7, %%mm7                   \n\t"
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *uvbuf0,
-                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *buf1, const uint16_t *ubuf0,
+                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                       const uint16_t *vbuf1, const uint16_t *abuf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
 static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *uvbuf0,
-                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *buf1, const uint16_t *ubuf0,
+                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                        const uint16_t *vbuf1, const uint16_t *abuf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1052,23 +1088,26 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
         WRITERGB15(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
 static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *uvbuf0,
-                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *buf1, const uint16_t *ubuf0,
+                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                        const uint16_t *vbuf1, const uint16_t *abuf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1079,12 +1118,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
         WRITERGB16(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
-#define REAL_YSCALEYUV2PACKED(index, c) \
+#define REAL_YSCALEYUV2PACKED(index, c, uv_off) \
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
     "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
     "psraw                $3, %%mm0                           \n\t"\
@@ -1096,8 +1135,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1120,34 +1161,39 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
     "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+#define YSCALEYUV2PACKED(index, c, uv_off)  REAL_YSCALEYUV2PACKED(index, c, uv_off)
 
 static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *buf1, const uint16_t *uvbuf0,
-                                         const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                         const uint16_t *buf1, const uint16_t *ubuf0,
+                                         const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                         const uint16_t *vbuf1, const uint16_t *abuf0,
                                          const uint16_t *abuf1, uint8_t *dest,
                                          int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov %4, %%"REG_b"                        \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5)
+        YSCALEYUV2PACKED(%%REGBP, %5, %6)
         WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
-#define REAL_YSCALEYUV2RGB1(index, c) \
+#define REAL_YSCALEYUV2RGB1(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
     "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
     "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@@ -1189,17 +1235,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+#define YSCALEYUV2RGB1(index, c, uv_off)  REAL_YSCALEYUV2RGB1(index, c, uv_off)
 
 // do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
+#define REAL_YSCALEYUV2RGB1b(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@@ -1243,7 +1291,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+#define YSCALEYUV2RGB1b(index, c, uv_off)  REAL_YSCALEYUV2RGB1b(index, c, uv_off)
 
 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
     "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
@@ -1257,11 +1305,13 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
  * YV12 to RGB without scaling or interpolating
  */
 static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
                                        const uint16_t *abuf0, uint8_t *dest,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1270,26 +1320,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1(%%REGBP, %5, %6)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1(%%REGBP, %5, %6)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         }
     } else {
@@ -1298,37 +1348,39 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         }
     }
 }
 
 static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
                                        const uint16_t *abuf0, uint8_t *dest,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1336,36 +1388,38 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                                         const uint16_t *abuf0, uint8_t *dest,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1373,7 +1427,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1384,15 +1438,15 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB15(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1403,18 +1457,20 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB15(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                                         const uint16_t *abuf0, uint8_t *dest,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1422,7 +1478,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1433,15 +1489,15 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB16(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1452,18 +1508,20 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB16(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
-#define REAL_YSCALEYUV2PACKED1(index, c) \
+#define REAL_YSCALEYUV2PACKED1(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psraw                $7, %%mm3     \n\t" \
     "psraw                $7, %%mm4     \n\t" \
     "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@@ -1471,16 +1529,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t" \
 
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+#define YSCALEYUV2PACKED1(index, c, uv_off)  REAL_YSCALEYUV2PACKED1(index, c, uv_off)
 
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
+#define REAL_YSCALEYUV2PACKED1b(index, c, uv_off) \
     "xor "#index", "#index"             \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $8, %%mm3     \n\t" \
@@ -1489,14 +1549,16 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+#define YSCALEYUV2PACKED1b(index, c, uv_off)  REAL_YSCALEYUV2PACKED1b(index, c, uv_off)
 
 static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                         const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                         const uint16_t *vbuf0, const uint16_t *vbuf1,
                                          const uint16_t *abuf0, uint8_t *dest,
                                          int dstW, int uvalpha, enum PixelFormat dstFormat,
                                          int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1504,24 +1566,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5)
+            YSCALEYUV2PACKED1(%%REGBP, %5, %6)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5)
+            YSCALEYUV2PACKED1b(%%REGBP, %5, %6)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
@@ -2074,7 +2136,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
         dst[i] = src[srcW-1]*128;
 }
 
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
+static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                         long dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
@@ -2089,7 +2151,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
 
     __asm__ volatile(
 #if defined(PIC)
-        "mov          %%"REG_b", %6         \n\t"
+        "mov          %%"REG_b", %7         \n\t"
 #endif
         "pxor             %%mm7, %%mm7      \n\t"
         "mov                 %0, %%"REG_c"  \n\t"
@@ -2107,8 +2169,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
         CALL_MMX2_FILTER_CODE
         "xor          %%"REG_a", %%"REG_a"  \n\t" // i
         "mov                 %5, %%"REG_c"  \n\t" // src
-        "mov                 %1, %%"REG_D"  \n\t" // buf1
-        "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+        "mov                 %6, %%"REG_D"  \n\t" // buf2
         PREFETCH"   (%%"REG_c")             \n\t"
         PREFETCH" 32(%%"REG_c")             \n\t"
         PREFETCH" 64(%%"REG_c")             \n\t"
@@ -2119,10 +2180,10 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
         CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-        "mov %6, %%"REG_b"    \n\t"
+        "mov %7, %%"REG_b"    \n\t"
 #endif
-        :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
-           "m" (mmx2FilterCode), "m" (src2)
+        :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
 #if defined(PIC)
           ,"m" (ebxsave)
 #endif
@@ -2133,8 +2194,8 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
     );
 
     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-        dst[i] = src1[srcW-1]*128;
-        dst[i+VOFW] = src2[srcW-1]*128;
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
     }
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
@@ -2146,7 +2207,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int dstH= c->dstH;
     const int flags= c->flags;
     int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -2171,7 +2233,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     c->redDither= ff_dither8[(dstY+1)&1];
     if (dstY < dstH - 2) {
         const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-        const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
         int i;
         if (flags & SWS_ACCURATE_RND) {
@@ -2190,29 +2253,26 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
                 }
             }
             for (i=0; i<vChrFilterSize; i+=2) {
-                *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
                 chrMmxFilter[s*i+APCK_COEF/4  ]=
                 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
                            + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
             }
         } else {
             for (i=0; i<vLumFilterSize; i++) {
-                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
                 lumMmxFilter[4*i+2]=
                 lumMmxFilter[4*i+3]=
                     ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
-                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
+                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
                     alpMmxFilter[4*i+2]=
                     alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
                 }
             }
             for (i=0; i<vChrFilterSize; i++) {
-                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
                 chrMmxFilter[4*i+2]=
                 chrMmxFilter[4*i+3]=
                     ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;

From 0f4eb8b04341081591bf401eaa2c07d6bc3ff52e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 09:17:52 -0400
Subject: [PATCH 321/830] swscale: remove VOF/VOFW.

---
 libswscale/swscale_internal.h |  8 --------
 libswscale/utils.c            | 15 +++++++--------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 964f710179..5aad9cfdca 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -35,14 +35,6 @@
 
 #define MAX_FILTER_SIZE 256
 
-#if ARCH_X86
-#define VOFW 5120
-#else
-#define VOFW 2048 // faster on PPC and not tested on others
-#endif
-
-#define VOF  (VOFW*2)
-
 #if HAVE_BIGENDIAN
 #define ALT32_CORR (-1)
 #else
diff --git a/libswscale/utils.c b/libswscale/utils.c
index cb174e5b8e..15ed9d1ce6 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -749,6 +749,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     int srcH= c->srcH;
     int dstW= c->dstW;
     int dstH= c->dstH;
+    int dst_stride = FFALIGN(dstW * sizeof(int16_t), 16), dst_stride_px = dst_stride >> 1;
     int flags, cpu_flags;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
@@ -996,26 +997,24 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
     /* align at 16 bytes for AltiVec */
     for (i=0; i<c->vLumBufSize; i++) {
-        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], VOF+1, fail);
+        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+1, fail);
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
-    c->uv_off = VOFW;
+    c->uv_off = dst_stride_px;
     for (i=0; i<c->vChrBufSize; i++) {
-        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], VOF*2+1, fail);
+        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
-        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + VOFW;
+        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + dst_stride_px;
     }
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
         for (i=0; i<c->vLumBufSize; i++) {
-            FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], VOF+1, fail);
+            FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], dst_stride+1, fail);
             c->alpPixBuf[i] = c->alpPixBuf[i+c->vLumBufSize];
         }
 
     //try to avoid drawing green stuff between the right end and the stride end
     for (i=0; i<c->vChrBufSize; i++)
-        memset(c->chrUPixBuf[i], 64, VOF*2+1);
-
-    assert(2*VOFW == VOF);
+        memset(c->chrUPixBuf[i], 64, dst_stride*2+1);
 
     assert(c->chrDstH <= dstH);
 

From 48520772d9eba9725382be612f1dcd87d2fda2e6 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 25 May 2011 15:28:12 -0400
Subject: [PATCH 322/830] swscale: use av_clip_uint8() in yuv2yuv1_c().

---
 libswscale/swscale_template.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 435f1a200f..29836f123a 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -52,29 +52,15 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
     int i;
     for (i=0; i<dstW; i++) {
         int val= (lumSrc[i]+64)>>7;
-
-        if (val&256) {
-            if (val<0) val=0;
-            else       val=255;
-        }
-
-        dest[i]= val;
+        dest[i]= av_clip_uint8(val);
     }
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
             int u=(chrUSrc[i]+64)>>7;
             int v=(chrVSrc[i]+64)>>7;
-
-            if ((u|v)&256) {
-                if (u<0)        u=0;
-                else if (u>255) u=255;
-                if (v<0)        v=0;
-                else if (v>255) v=255;
-            }
-
-            uDest[i]= u;
-            vDest[i]= v;
+            uDest[i]= av_clip_uint8(u);
+            vDest[i]= av_clip_uint8(v);
         }
 
     if (CONFIG_SWSCALE_ALPHA && aDest)

From e16942852979c44faaa8fcd6ad95d1ff3642368b Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 13:37:13 +0200
Subject: [PATCH 323/830] Mark parameterless function declarations as 'void'.

---
 libavcodec/bink.c     | 2 +-
 libavformat/network.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index 34d4d10fae..e085aa54e2 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -1208,7 +1208,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
 /**
  * Caclulate quantization tables for version b
  */
-static av_cold void binkb_calc_quant()
+static av_cold void binkb_calc_quant(void)
 {
     uint8_t inv_bink_scan[64];
     double s[64];
diff --git a/libavformat/network.h b/libavformat/network.h
index 881384c943..db8466ce20 100644
--- a/libavformat/network.h
+++ b/libavformat/network.h
@@ -33,7 +33,8 @@
 #define ECONNREFUSED    WSAECONNREFUSED
 #define EINPROGRESS     WSAEINPROGRESS
 
-static inline int ff_neterrno() {
+static inline int ff_neterrno(void)
+{
     int err = WSAGetLastError();
     switch (err) {
     case WSAEWOULDBLOCK:

From 6c6c976fa93be0aef731fc0884f784a9be81071e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 26 Apr 2011 14:29:14 +0200
Subject: [PATCH 324/830] ffplay: Remove disabled code.

---
 ffplay.c | 66 +-------------------------------------------------------
 1 file changed, 1 insertion(+), 65 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index c3dc970a1e..3bcab5452b 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -407,44 +407,6 @@ static inline void fill_rectangle(SDL_Surface *screen,
     SDL_FillRect(screen, &rect, color);
 }
 
-#if 0
-/* draw only the border of a rectangle */
-void fill_border(VideoState *s, int x, int y, int w, int h, int color)
-{
-    int w1, w2, h1, h2;
-
-    /* fill the background */
-    w1 = x;
-    if (w1 < 0)
-        w1 = 0;
-    w2 = s->width - (x + w);
-    if (w2 < 0)
-        w2 = 0;
-    h1 = y;
-    if (h1 < 0)
-        h1 = 0;
-    h2 = s->height - (y + h);
-    if (h2 < 0)
-        h2 = 0;
-    fill_rectangle(screen,
-                   s->xleft, s->ytop,
-                   w1, s->height,
-                   color);
-    fill_rectangle(screen,
-                   s->xleft + s->width - w2, s->ytop,
-                   w2, s->height,
-                   color);
-    fill_rectangle(screen,
-                   s->xleft + w1, s->ytop,
-                   s->width - w1 - w2, h1,
-                   color);
-    fill_rectangle(screen,
-                   s->xleft + w1, s->ytop + s->height - h2,
-                   s->width - w1 - w2, h2,
-                   color);
-}
-#endif
-
 #define ALPHA_BLEND(a, oldp, newp, s)\
 ((((oldp << s) * (255 - (a))) + (newp * (a))) / (255 << s))
 
@@ -747,23 +709,12 @@ static void video_image_display(VideoState *is)
         }
         x = (is->width - width) / 2;
         y = (is->height - height) / 2;
-        if (!is->no_background) {
-            /* fill the background */
-            //            fill_border(is, x, y, width, height, QERGB(0x00, 0x00, 0x00));
-        } else {
-            is->no_background = 0;
-        }
+        is->no_background = 0;
         rect.x = is->xleft + x;
         rect.y = is->ytop  + y;
         rect.w = width;
         rect.h = height;
         SDL_DisplayYUVOverlay(vp->bmp, &rect);
-    } else {
-#if 0
-        fill_rectangle(screen,
-                       is->xleft, is->ytop, is->width, is->height,
-                       QERGB(0x00, 0x00, 0x00));
-#endif
     }
 }
 
@@ -1503,10 +1454,6 @@ static int output_picture2(VideoState *is, AVFrame *src_frame, double pts1, int6
     frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
     is->video_clock += frame_delay;
 
-#if defined(DEBUG_SYNC) && 0
-    printf("frame_type=%c clock=%0.3f pts=%0.3f\n",
-           av_get_picture_type_char(src_frame->pict_type), pts, pts1);
-#endif
     return queue_picture(is, src_frame, pts, pos);
 }
 
@@ -1920,8 +1867,6 @@ static int subtitle_thread(void *arg)
         len1 = avcodec_decode_subtitle2(is->subtitle_st->codec,
                                     &sp->sub, &got_subtitle,
                                     pkt);
-//            if (len1 < 0)
-//                break;
         if (got_subtitle && sp->sub.format == 0) {
             sp->pts = pts;
 
@@ -1945,9 +1890,6 @@ static int subtitle_thread(void *arg)
             SDL_UnlockMutex(is->subpq_mutex);
         }
         av_free_packet(pkt);
-//        if (step)
-//            if (cur_stream)
-//                stream_pause(cur_stream);
     }
  the_end:
     return 0;
@@ -2270,8 +2212,6 @@ static int stream_component_open(VideoState *is, int stream_index)
         is->video_stream = stream_index;
         is->video_st = ic->streams[stream_index];
 
-//        is->video_current_pts_time = av_gettime();
-
         packet_queue_init(&is->videoq);
         is->video_tid = SDL_CreateThread(video_thread, is);
         break;
@@ -2718,10 +2658,6 @@ static void stream_cycle_channel(VideoState *is, int codec_type)
 static void toggle_full_screen(void)
 {
     is_full_screen = !is_full_screen;
-    if (!fs_screen_width) {
-        /* use default SDL method */
-//        SDL_WM_ToggleFullScreen(screen);
-    }
     video_open(cur_stream);
 }
 

From 25101cf5bde4e05e22cbf0c53d8c82cbf56d0a35 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 14:40:29 +0200
Subject: [PATCH 325/830] doc: Drop hint at --enable-memalign-hack for MinGW,
 it is now autodetected.

---
 doc/general.texi | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/doc/general.texi b/doc/general.texi
index ce531ccdfb..94fdb1dec4 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -831,18 +831,6 @@ are listed below:
 Libav automatically passes @code{-fno-common} to the compiler to work around
 a GCC bug (see @url{http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37216}).
 
-Within the MSYS shell, configure and make with:
-
-@example
-./configure --enable-memalign-hack
-make
-make install
-@end example
-
-This will install @file{ffmpeg.exe} along with many other development files
-to @file{/usr/local}. You may specify another install path using the
-@code{--prefix} option in @file{configure}.
-
 Notes:
 
 @itemize

From e14574eb68271fb9d1e255950c5a304f36d9a6ba Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 14:21:04 +0200
Subject: [PATCH 326/830] configure: Add -D_GNU_SOURCE to CPPFLAGS on OS/2.

The flag is required for some C99 math functions to be declared.
---
 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index c628ff65bb..d3e8764fb9 100755
--- a/configure
+++ b/configure
@@ -2477,6 +2477,7 @@ case $target_os in
     os/2*)
         ln_s="cp -f"
         objformat="aout"
+        add_cppflags -D_GNU_SOURCE
         add_ldflags -Zomf -Zbin-files -Zargs-wild -Zmap
         SHFLAGS='$(SUBDIR)$(NAME).def -Zdll -Zomf'
         FFSERVERLDFLAGS=""

From 59748689f6d32cdd24080947a494093800f5629f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 15:48:31 +0200
Subject: [PATCH 327/830] build: Remove generated .version file on distclean.

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index afd783da5b..73ada9a59d 100644
--- a/Makefile
+++ b/Makefile
@@ -168,7 +168,7 @@ clean:: testclean
 
 distclean::
 	$(RM) $(DISTCLEANSUFFIXES)
-	$(RM) version.h config.* libavutil/avconfig.h
+	$(RM) config.* .version version.h libavutil/avconfig.h
 
 config:
 	$(SRC_PATH)/configure $(value LIBAV_CONFIGURATION)

From e758573a887cfb1155e81499ca54f433127cf24e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 10:36:47 -0400
Subject: [PATCH 328/830] swscale: fix compile on x86-32.

---
 libswscale/x86/swscale_template.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 0de7e94c97..fd59f763c5 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1019,7 +1019,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither), "m"(uv_off)
         );
 #endif

From 93681fbd5082a3af896b7a730dacdd27a3052406 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 11:32:32 -0400
Subject: [PATCH 329/830] swscale: fix compile on ppc.

---
 libswscale/ppc/swscale_altivec_template.c | 30 ++++++++++++-----------
 libswscale/ppc/swscale_template.c         | 25 ++++++++++++-------
 libswscale/ppc/yuv2rgb_altivec.c          | 17 +++++++------
 libswscale/swscale_internal.h             |  7 +++---
 4 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index b123f70f85..eee7bdd0a5 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
 }
 
 static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                      const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                      uint8_t *vDest, int dstW, int chrDstW)
 {
     const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
     register int i, j;
@@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
             vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
             vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
 
-            perm = vec_lvsl(0, chrSrc[j]);
-            l1 = vec_ld(0, chrSrc[j]);
-            l1_V = vec_ld(2048 << 1, chrSrc[j]);
+            perm = vec_lvsl(0, chrUSrc[j]);
+            l1 = vec_ld(0, chrUSrc[j]);
+            l1_V = vec_ld(0, chrVSrc[j]);
 
             for (i = 0; i < (chrDstW - 7); i+=8) {
                 int offset = i << 2;
-                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
-                vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
+                vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
+                vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
 
                 vector signed int v1 = vec_ld(offset, u);
                 vector signed int v2 = vec_ld(offset + 16, u);
                 vector signed int v1_V = vec_ld(offset, v);
                 vector signed int v2_V = vec_ld(offset + 16, v);
 
-                vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
-                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
+                vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
+                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
 
                 vector signed int i1 = vec_mule(vChrFilter, ls);
                 vector signed int i2 = vec_mulo(vChrFilter, ls);
@@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
 
                 vector signed int vf1 = vec_mergeh(i1, i2);
-                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
                 vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
-                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
 
                 vector signed int vo1 = vec_add(v1, vf1);
                 vector signed int vo2 = vec_add(v2, vf2);
@@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 l1_V = l2_V;
             }
             for ( ; i < chrDstW; i++) {
-                u[i] += chrSrc[j][i] * chrFilter[j];
-                v[i] += chrSrc[j][i + 2048] * chrFilter[j];
+                u[i] += chrUSrc[j][i] * chrFilter[j];
+                v[i] += chrVSrc[j][i] * chrFilter[j];
             }
         }
         altivec_packIntArrayToCharArray(u, uDest, chrDstW);
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 7968177b52..0fe97a1114 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -24,21 +24,28 @@
 #endif
 
 #if COMPILE_TEMPLATE_ALTIVEC
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc, int chrFilterSize,
+                                    const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
 {
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                          chrFilter, chrSrc, chrFilterSize,
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                           dest, uDest, vDest, dstW, chrDstW);
 }
 
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest,
+                                       long dstW, long dstY)
 {
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
@@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
           c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
           c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
             ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                                    dest, dstW, dstY);
     else
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
 #endif
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 96c208a074..abd49c9e96 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -778,10 +778,11 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 
 
 void
-ff_yuv2packedX_altivec(SwsContext *c,
-                       const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                     uint8_t *dest, int dstW, int dstY)
+ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc, int chrFilterSize,
+                       uint8_t *dest, int dstW, int dstY)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
@@ -894,9 +895,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5aad9cfdca..8ba0fe9090 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -333,9 +333,10 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
-void ff_yuv2packedX_altivec(SwsContext *c,
-                            const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
                             uint8_t *dest, int dstW, int dstY);
 
 const char *sws_format_name(enum PixelFormat format);

From aaea1490351bfd1e8f21cb4b7e4c25d82d090e94 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 17:48:52 +0200
Subject: [PATCH 330/830] lavdevice: mark v4l for removal on next major bump.

---
 libavdevice/alldevices.c | 2 ++
 libavdevice/avdevice.h   | 4 ++++
 libavdevice/v4l.c        | 5 +++++
 3 files changed, 11 insertions(+)

diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index 0e2cf6d12e..e7bfb027cd 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c
@@ -47,7 +47,9 @@ void avdevice_register_all(void)
     REGISTER_INOUTDEV (OSS, oss);
     REGISTER_INOUTDEV (SNDIO, sndio);
     REGISTER_INDEV    (V4L2, v4l2);
+#if FF_API_V4L
     REGISTER_INDEV    (V4L, v4l);
+#endif
     REGISTER_INDEV    (VFWCAP, vfwcap);
     REGISTER_INDEV    (X11_GRAB_DEVICE, x11_grab_device);
 
diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h
index d1e321bf3a..d82b26fda6 100644
--- a/libavdevice/avdevice.h
+++ b/libavdevice/avdevice.h
@@ -33,6 +33,10 @@
                                            LIBAVDEVICE_VERSION_MICRO)
 #define LIBAVDEVICE_BUILD       LIBAVDEVICE_VERSION_INT
 
+#ifndef FF_API_V4L
+#define FF_API_V4L              (LIBAVDEVICE_VERSION_MAJOR < 54)
+#endif
+
 /**
  * Return the LIBAVDEVICE_VERSION_INT constant.
  */
diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index 54d0394ff5..6742667f05 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#if FF_API_V4L
+
 #undef __STRICT_ANSI__ //workaround due to broken kernel headers
 #include "config.h"
 #include "libavutil/rational.h"
@@ -83,6 +85,8 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int j;
     int vformat_num = FF_ARRAY_ELEMS(video_formats);
 
+    av_log(s1, AV_LOG_WARNING, "V4L input device is deprecated and will be removed in the next release.");
+
     if (ap->time_base.den <= 0) {
         av_log(s1, AV_LOG_ERROR, "Wrong time base (%d)\n", ap->time_base.den);
         return -1;
@@ -368,3 +372,4 @@ AVInputFormat ff_v4l_demuxer = {
     .flags = AVFMT_NOFILE,
     .priv_class = &v4l_class,
 };
+#endif  /* FF_API_V4L */

From 7e637b70ecd425fbe1479066a5751f26b9fce8a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Thu, 26 May 2011 19:44:39 +0200
Subject: [PATCH 331/830] Fix compilation with YASM/NASM versions not
 supporting AVX.

---
 libavcodec/x86/fft_mmx.asm | 10 ++++++++++
 libavcodec/x86/fft_sse.c   |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 2551b48df2..b9739d7d56 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -299,6 +299,8 @@ IF%1 mova  Z(1), m5
 
 INIT_YMM
 
+%ifdef HAVE_AVX
+
 align 16
 fft8_avx:
     mova      m0, Z(0)
@@ -388,6 +390,8 @@ fft32_interleave_avx:
     jg .deint_loop
     ret
 
+%endif
+
 INIT_XMM
 %define movdqa  movaps
 
@@ -543,8 +547,10 @@ INIT_YMM
 
 %define INTERL INTERL_AVX
 
+%ifdef HAVE_AVX
 DECL_PASS pass_avx, PASS_BIG 1
 DECL_PASS pass_interleave_avx, PASS_BIG 0
+%endif
 
 INIT_XMM
 
@@ -634,8 +640,10 @@ cglobal fft_dispatch%3%2, 2,5,8, z, nbits
     RET
 %endmacro ; DECL_FFT
 
+%ifdef HAVE_AVX
 DECL_FFT 6, _avx
 DECL_FFT 6, _avx, _interleave
+%endif
 DECL_FFT 5, _sse
 DECL_FFT 5, _sse, _interleave
 DECL_FFT 4, _3dn
@@ -847,4 +855,6 @@ DECL_IMDCT _sse, POSROTATESHUF
 
 INIT_YMM
 
+%ifdef HAVE_AVX
 DECL_IMDCT _avx, POSROTATESHUF_AVX
+%endif
diff --git a/libavcodec/x86/fft_sse.c b/libavcodec/x86/fft_sse.c
index 801dc1bc8a..2f727e7b81 100644
--- a/libavcodec/x86/fft_sse.c
+++ b/libavcodec/x86/fft_sse.c
@@ -30,10 +30,12 @@ void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
 void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits);
 void ff_fft_dispatch_interleave_avx(FFTComplex *z, int nbits);
 
+#if HAVE_AVX
 void ff_fft_calc_avx(FFTContext *s, FFTComplex *z)
 {
     ff_fft_dispatch_interleave_avx(z, s->nbits);
 }
+#endif
 
 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
 {

From 6c117d43c968629758c1b652e9b3a60cda6dcbac Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 08:48:31 +0200
Subject: [PATCH 332/830] AVOptions: set string default values.

---
 libavutil/opt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index 74c39fee5f..4e25918ed1 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -443,8 +443,10 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
             }
             break;
             case FF_OPT_TYPE_STRING:
+                av_set_string3(s, opt->name, opt->default_val.str, 1, NULL);
+                break;
             case FF_OPT_TYPE_BINARY:
-                /* Cannot set default for string as default_val is of type * double */
+                /* Cannot set default for binary */
             break;
             default:
                 av_log(s, AV_LOG_DEBUG, "AVOption type %d of option %s not implemented yet\n", opt->type, opt->name);

From 9749cdf7754e600ff50dc255519fdb7bb5aad800 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 06:59:08 +0200
Subject: [PATCH 333/830] lavf: initialize demuxer private options.

---
 libavformat/utils.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 89ab1086fe..b844ab95bc 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -424,6 +424,10 @@ int av_open_input_stream(AVFormatContext **ic_ptr,
             err = AVERROR(ENOMEM);
             goto fail;
         }
+        if (fmt->priv_class) {
+            *(const AVClass**)ic->priv_data = fmt->priv_class;
+            av_opt_set_defaults(ic->priv_data);
+        }
     } else {
         ic->priv_data = NULL;
     }

From e2d7dc87df0c1e261e4a3fe082d192a7a798d4d5 Mon Sep 17 00:00:00 2001
From: Mike Williams <mike@mikebwilliams.com>
Date: Wed, 18 May 2011 09:14:22 -0400
Subject: [PATCH 334/830] ffserver: move close_connection() call to avoid a
 temporary string and copy.

Signed-off-by: Mike Williams <mike@mikebwilliams.com>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 ffserver.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/ffserver.c b/ffserver.c
index b95a854363..56bcba58f7 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -3274,7 +3274,6 @@ static void rtsp_cmd_pause(HTTPContext *c, const char *url, RTSPMessageHeader *h
 static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPMessageHeader *h)
 {
     HTTPContext *rtp_c;
-    char session_id[32];
 
     rtp_c = find_rtp_session_with_url(url, h->session_id);
     if (!rtp_c) {
@@ -3282,16 +3281,14 @@ static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPMessageHeader
         return;
     }
 
-    av_strlcpy(session_id, rtp_c->session_id, sizeof(session_id));
-
-    /* abort the session */
-    close_connection(rtp_c);
-
     /* now everything is OK, so we can send the connection parameters */
     rtsp_reply_header(c, RTSP_STATUS_OK);
     /* session ID */
-    avio_printf(c->pb, "Session: %s\r\n", session_id);
+    avio_printf(c->pb, "Session: %s\r\n", rtp_c->session_id);
     avio_printf(c->pb, "\r\n");
+
+    /* abort the session */
+    close_connection(rtp_c);
 }
 
 

From 3d96c13e437e77a3220146aa466d89df52871fbd Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 20:32:05 +0200
Subject: [PATCH 335/830] v4l: include avdevice.h

Fixes build.
---
 libavdevice/v4l.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index 6742667f05..1be7adf49d 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "avdevice.h"
+
 #if FF_API_V4L
 
 #undef __STRICT_ANSI__ //workaround due to broken kernel headers

From 74cc8c52ed42e269715e128d5aa0708b9f7ec463 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 25 May 2011 18:43:28 +0100
Subject: [PATCH 336/830] ARM: fix av_clipl_int32_arm()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/arm/intmath.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 2f651d1875..155d1a877d 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -93,6 +93,7 @@ static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
     int x, y;
     __asm__ volatile ("adds   %1, %R2, %Q2, lsr #31  \n\t"
                       "mvnne  %1, #1<<31             \n\t"
+                      "moveq  %0, %Q2                \n\t"
                       "eorne  %0, %1,  %R2, asr #31  \n\t"
                       : "=r"(x), "=&r"(y) : "r"(a));
     return x;

From 77cd6efc33f45a5f7dbb37e10f49f55e9fe3c479 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 24 May 2011 15:45:50 +0100
Subject: [PATCH 337/830] ARM: remove volatile from asm statements in
 libavutil/intmath

The volatile qualifiers are not needed on these statements as
their effects are fully specified by constraints.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/arm/intmath.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 155d1a877d..5f85bfa603 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -34,11 +34,11 @@
 static av_always_inline av_const int FASTDIV(int a, int b)
 {
     int r, t;
-    __asm__ volatile("cmp     %3, #2               \n\t"
-                     "ldr     %1, [%4, %3, lsl #2] \n\t"
-                     "lsrle   %0, %2, #1           \n\t"
-                     "smmulgt %0, %1, %2           \n\t"
-                     : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    __asm__ ("cmp     %3, #2               \n\t"
+             "ldr     %1, [%4, %3, lsl #2] \n\t"
+             "lsrle   %0, %2, #1           \n\t"
+             "smmulgt %0, %1, %2           \n\t"
+             : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
     return r;
 }
 
@@ -46,7 +46,7 @@ static av_always_inline av_const int FASTDIV(int a, int b)
 static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
 {
     unsigned x;
-    __asm__ volatile ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
+    __asm__ ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
     return x;
 }
 
@@ -54,7 +54,7 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
 static av_always_inline av_const uint8_t av_clip_int8_arm(int a)
 {
     unsigned x;
-    __asm__ volatile ("ssat %0, #8,  %1" : "=r"(x) : "r"(a));
+    __asm__ ("ssat %0, #8,  %1" : "=r"(x) : "r"(a));
     return x;
 }
 
@@ -62,7 +62,7 @@ static av_always_inline av_const uint8_t av_clip_int8_arm(int a)
 static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
 {
     unsigned x;
-    __asm__ volatile ("usat %0, #16, %1" : "=r"(x) : "r"(a));
+    __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
     return x;
 }
 
@@ -70,7 +70,7 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
 static av_always_inline av_const int16_t av_clip_int16_arm(int a)
 {
     int x;
-    __asm__ volatile ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
+    __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
     return x;
 }
 
@@ -80,8 +80,8 @@ static av_always_inline av_const int16_t av_clip_int16_arm(int a)
 static av_always_inline av_const int FASTDIV(int a, int b)
 {
     int r, t;
-    __asm__ volatile("umull %1, %0, %2, %3"
-                     : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
+    __asm__ ("umull %1, %0, %2, %3"
+             : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
     return r;
 }
 
@@ -91,11 +91,11 @@ static av_always_inline av_const int FASTDIV(int a, int b)
 static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
 {
     int x, y;
-    __asm__ volatile ("adds   %1, %R2, %Q2, lsr #31  \n\t"
-                      "mvnne  %1, #1<<31             \n\t"
-                      "moveq  %0, %Q2                \n\t"
-                      "eorne  %0, %1,  %R2, asr #31  \n\t"
-                      : "=r"(x), "=&r"(y) : "r"(a));
+    __asm__ ("adds   %1, %R2, %Q2, lsr #31  \n\t"
+             "mvnne  %1, #1<<31             \n\t"
+             "moveq  %0, %Q2                \n\t"
+             "eorne  %0, %1,  %R2, asr #31  \n\t"
+             : "=r"(x), "=&r"(y) : "r"(a));
     return x;
 }
 

From ca7d8256e32e4dbafadc54a65b441945ac759ca9 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 24 May 2011 15:46:28 +0100
Subject: [PATCH 338/830] ARM: add ARMv6 optimised av_clip_uintp2

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/arm/intmath.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 5f85bfa603..cc3de90f04 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -74,6 +74,15 @@ static av_always_inline av_const int16_t av_clip_int16_arm(int a)
     return x;
 }
 
+#define av_clip_uintp2 av_clip_uintp2_arm
+static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p)
+{
+    unsigned x;
+    __asm__ ("usat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p));
+    return x;
+}
+
+
 #else /* HAVE_ARMV6 */
 
 #define FASTDIV FASTDIV

From 5655469ee73bc7f5a975a909738a764b9be7949b Mon Sep 17 00:00:00 2001
From: JULIAN GARDNER <joolzg@btinternet.com>
Date: Thu, 26 May 2011 15:32:14 +0200
Subject: [PATCH 339/830] libx264: support aspect Ratio Switch

---
 libavcodec/libx264.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index d9bac17484..28683a930a 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -119,6 +119,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
             x4->params.b_tff = frame->top_field_first;
             x264_encoder_reconfig(x4->enc, &x4->params);
         }
+        if (x4->params.vui.i_sar_height != ctx->sample_aspect_ratio.den
+         || x4->params.vui.i_sar_width != ctx->sample_aspect_ratio.num) {
+            x4->params.vui.i_sar_height = ctx->sample_aspect_ratio.den;
+            x4->params.vui.i_sar_width = ctx->sample_aspect_ratio.num;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        }
     }
 
     do {

From 2b6bfff2b21f07c5455ef873cc9331a1b7fbf83c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 26 May 2011 15:32:33 +0200
Subject: [PATCH 340/830] swscale: Do not loose precission on yuv values after
 rgb->yuv.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c              | 74 +++++++++++++++----------------
 libswscale/swscale_template.c     | 43 ++++++++++--------
 libswscale/utils.c                |  2 +-
 libswscale/x86/swscale_template.c | 46 +++++++++----------
 libswscale/x86/swscale_template.h |  4 +-
 tests/ref/lavf/pixfmt             | 10 ++---
 tests/ref/lavfi/pixfmts_scale_le  | 32 ++++++-------
 tests/ref/vsynth1/flashsv         |  4 +-
 tests/ref/vsynth1/jpegls          |  4 +-
 tests/ref/vsynth1/msvideo1        |  4 +-
 tests/ref/vsynth1/qtrle           |  4 +-
 tests/ref/vsynth1/rgb             |  4 +-
 tests/ref/vsynth2/flashsv         |  4 +-
 tests/ref/vsynth2/jpegls          |  4 +-
 tests/ref/vsynth2/msvideo1        |  4 +-
 tests/ref/vsynth2/qtrle           |  4 +-
 tests/ref/vsynth2/rgb             |  4 +-
 17 files changed, 128 insertions(+), 123 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 63a3f81335..84926635c3 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1045,7 +1045,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb48ToY(int16_t *dst, const uint8_t *src, long width,
                             uint32_t *unused)
 {
     int i;
@@ -1054,11 +1054,11 @@ static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
         int g = src[i*6+2];
         int b = src[i*6+4];
 
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
     }
 }
 
-static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void rgb48ToUV(int16_t *dstU, int16_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
                              long width, uint32_t *unused)
 {
@@ -1069,12 +1069,12 @@ static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
         int g = src1[6*i + 2];
         int b = src1[6*i + 4];
 
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+        dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
     }
 }
 
-static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
+static inline void rgb48ToUV_half(int16_t *dstU, int16_t *dstV,
                                   const uint8_t *src1, const uint8_t *src2,
                                   long width, uint32_t *unused)
 {
@@ -1085,12 +1085,12 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
         int g= src1[12*i + 2] + src1[12*i + 8];
         int b= src1[12*i + 4] + src1[12*i + 10];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
     }
 }
 
-static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void bgr48ToY(int16_t *dst, const uint8_t *src, long width,
                             uint32_t *unused)
 {
     int i;
@@ -1099,11 +1099,11 @@ static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
         int g = src[i*6+2];
         int r = src[i*6+4];
 
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
     }
 }
 
-static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void bgr48ToUV(int16_t *dstU, int16_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
                              long width, uint32_t *unused)
 {
@@ -1113,12 +1113,12 @@ static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
         int g = src1[6*i + 2];
         int r = src1[6*i + 4];
 
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+        dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
     }
 }
 
-static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
+static inline void bgr48ToUV_half(int16_t *dstU, int16_t *dstV,
                                   const uint8_t *src1, const uint8_t *src2,
                                   long width, uint32_t *unused)
 {
@@ -1128,13 +1128,13 @@ static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
         int g= src1[12*i + 2] + src1[12*i + 8];
         int r= src1[12*i + 4] + src1[12*i + 10];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
     }
 }
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1142,7 +1142,7 @@ static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *
         int g= (((const type*)src)[i]>>shg)&maskg;\
         int r= (((const type*)src)[i]>>shr)&maskr;\
 \
-        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
+        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (32<<((S)-1)) + (1<<(S-7)))>>((S)-6));\
     }\
 }
 
@@ -1155,16 +1155,16 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
-        dst[i]= src[4*i];
+        dst[i]= src[4*i]<<6;
     }
 }
 
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1172,11 +1172,11 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
         int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
         int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
 \
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
+        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
+        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
     }\
 }\
-static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1189,8 +1189,8 @@ static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *sr
 \
         g>>=shg;\
 \
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
+        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
+        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
     }\
 }
 
@@ -1203,27 +1203,27 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
-static inline void palToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToA(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
         int d= src[i];
 
-        dst[i]= pal[d] >> 24;
+        dst[i]= (pal[d] >> 24)<<6;
     }
 }
 
-static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
         int d= src[i];
 
-        dst[i]= pal[d] & 0xFF;
+        dst[i]= (pal[d] & 0xFF)<<6;
     }
 }
 
-static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void palToUV(uint16_t *dstU, int16_t *dstV,
                            const uint8_t *src1, const uint8_t *src2,
                            long width, uint32_t *pal)
 {
@@ -1232,28 +1232,28 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
     for (i=0; i<width; i++) {
         int p= pal[src1[i]];
 
-        dstU[i]= p>>8;
-        dstV[i]= p>>16;
+        dstU[i]= (uint8_t)(p>> 8)<<6;
+        dstV[i]= (uint8_t)(p>>16)<<6;
     }
 }
 
-static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
         int d= ~src[i];
         for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
     }
 }
 
-static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monoblack2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
         int d= src[i];
         for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
     }
 }
 
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 4bb7bf2dad..8bf38b5946 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -254,7 +254,7 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
-static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
                               long width, uint32_t *unused)
 {
     int i;
@@ -263,11 +263,11 @@ static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
         int g= src[i*3+1];
         int r= src[i*3+2];
 
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+        dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
     }
 }
 
-static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
                                const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
@@ -276,13 +276,13 @@ static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
         int g= src1[3*i + 1];
         int r= src1[3*i + 2];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
     }
     assert(src1 == src2);
 }
 
-static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
                                     const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
@@ -291,13 +291,13 @@ static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
         int g= src1[6*i + 1] + src1[6*i + 4];
         int r= src1[6*i + 2] + src1[6*i + 5];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
     }
     assert(src1 == src2);
 }
 
-static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
                               uint32_t *unused)
 {
     int i;
@@ -306,11 +306,11 @@ static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
         int g= src[i*3+1];
         int b= src[i*3+2];
 
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+        dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
     }
 }
 
-static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
                                const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
@@ -320,12 +320,12 @@ static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
         int g= src1[3*i + 1];
         int b= src1[3*i + 2];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
     }
 }
 
-static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
                                     const uint8_t *src2, long width, uint32_t *unused)
 {
     int i;
@@ -335,8 +335,8 @@ static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
         int g= src1[6*i + 1] + src1[6*i + 4];
         int b= src1[6*i + 2] + src1[6*i + 5];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
     }
 }
 
@@ -455,7 +455,8 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
     }
 
     if (c->hScale16) {
-        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
     } else if (!c->hyscale_fast) {
         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
@@ -502,8 +503,9 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
     }
 
     if (c->hScale16) {
-        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
-        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+        int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
     } else if (!c->hcscale_fast) {
         c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
         c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
@@ -959,6 +961,9 @@ static void sws_init_swScale_c(SwsContext *c)
         }
     }
 
+    if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
+        c->hScale16= hScale16_c;
+
     switch (srcFormat) {
     case PIX_FMT_GRAY8A :
         c->alpSrcOffset = 1;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index fada19210b..5eac356340 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -860,7 +860,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
         }
-        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0;
+        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0;
     }
     else
         c->canMMX2BeUsed=0;
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ffc01c5e66..99b4413f90 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1615,7 +1615,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24) {
@@ -1655,20 +1655,19 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long w
         "paddd                   %%mm3, %%mm2       \n\t"
         "paddd                   %%mm4, %%mm0       \n\t"
         "paddd                   %%mm4, %%mm2       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
+        "psrad                     $9, %%mm0       \n\t"
+        "psrad                     $9, %%mm2       \n\t"
         "packssdw                %%mm2, %%mm0       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
+        "movq                %%mm0, (%1, %%"REG_a") \n\t"
+        "add                        $8, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dst+width), "g" ((x86_reg)-width)
+    : "r" (dst+width), "g" ((x86_reg)-2*width)
     : "%"REG_a
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
 {
     __asm__ volatile(
         "movq                    24(%4), %%mm6       \n\t"
@@ -1708,41 +1707,39 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
         "paddd                   %%mm3, %%mm2       \n\t"
         "paddd                   %%mm3, %%mm1       \n\t"
         "paddd                   %%mm3, %%mm4       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "psrad                     $15, %%mm1       \n\t"
-        "psrad                     $15, %%mm4       \n\t"
+        "psrad                     $9, %%mm0       \n\t"
+        "psrad                     $9, %%mm2       \n\t"
+        "psrad                     $9, %%mm1       \n\t"
+        "psrad                     $9, %%mm4       \n\t"
         "packssdw                %%mm1, %%mm0       \n\t"
         "packssdw                %%mm4, %%mm2       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "packuswb                %%mm2, %%mm2       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "movd                %%mm2, (%2, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
+        "movq                %%mm0, (%1, %%"REG_a") \n\t"
+        "movq                %%mm2, (%2, %%"REG_a") \n\t"
+        "add                        $8, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
+    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
     : "%"REG_a
     );
 }
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 }
 
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
 }
 
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
 {
     assert(src1==src2);
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -2323,7 +2320,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_YUV420P16LE:
         case PIX_FMT_YUV422P16LE:
         case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
-    }   
+    }
     if (!c->chrSrcHSubSample) {
         switch(srcFormat) {
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -2348,4 +2345,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         default: break;
         }
     }
+
+    if(isAnyRGB(c->srcFormat))
+        c->hScale16= RENAME(hScale16);
 }
diff --git a/libswscale/x86/swscale_template.h b/libswscale/x86/swscale_template.h
index 74e12c75af..f746c56fbe 100644
--- a/libswscale/x86/swscale_template.h
+++ b/libswscale/x86/swscale_template.h
@@ -67,13 +67,13 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL;
 
 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
     {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
     {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
 };
 
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL;
 
 #endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */
diff --git a/tests/ref/lavf/pixfmt b/tests/ref/lavf/pixfmt
index 186dde5ed3..a6618390e4 100644
--- a/tests/ref/lavf/pixfmt
+++ b/tests/ref/lavf/pixfmt
@@ -16,15 +16,15 @@ e176bd14185788110e055f945de7f95f *./tests/data/pixfmt/yuvj420p.yuv
 304128 ./tests/data/pixfmt/yuvj422p.yuv
 c10442da177c9f1d12be3c53be6fa12c *./tests/data/pixfmt/yuvj444p.yuv
 304128 ./tests/data/pixfmt/yuvj444p.yuv
-c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb24.yuv
+6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb24.yuv
 304128 ./tests/data/pixfmt/rgb24.yuv
-c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/bgr24.yuv
+6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/bgr24.yuv
 304128 ./tests/data/pixfmt/bgr24.yuv
-c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb32.yuv
+6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb32.yuv
 304128 ./tests/data/pixfmt/rgb32.yuv
-66d39d464bd89ded2a124897f0a75ade *./tests/data/pixfmt/rgb565.yuv
+efa7c0337cc00c796c6df615223716f1 *./tests/data/pixfmt/rgb565.yuv
 304128 ./tests/data/pixfmt/rgb565.yuv
-c894c3bd8d2631ed1964500b90a0c350 *./tests/data/pixfmt/rgb555.yuv
+0df2a477af1415a1b8fbf2a3e552bc39 *./tests/data/pixfmt/rgb555.yuv
 304128 ./tests/data/pixfmt/rgb555.yuv
 6be306b0cce5f8e6c271ea17fef9745b *./tests/data/pixfmt/gray.yuv
 304128 ./tests/data/pixfmt/gray.yuv
diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index 29d1b2c340..a66138a8a3 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -1,28 +1,28 @@
-abgr                d894cb97f6c80eb21bdbe8a4eea62d86
-argb                54346f2b2eef10919e0f247241df3b24
-bgr24               570f8d6b51a838aed022ef67535f6bdc
-bgr48be             fcc0f2dbf45d325f84f816c74cbeeebe
-bgr48le             3f9c2b23eed3b8d196d1c14b38ce50f5
+abgr                cff82561a074874027ac1cc896fd2730
+argb                756dd1eaa5baca2238ce23dbdc452684
+bgr24               e44192347a45586c6c157e3059610cd1
+bgr48be             62e6043fbe9734e63ad679999ca8011c
+bgr48le             61237dad4fa5f3e9109db85f53cd25d9
 bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
-bgr555le            36b745067197f9ca8c1731cac51329c9
-bgr565le            3a514a298c6161a071ddf9963c06509d
+bgr555le            41e3e0961478dc634bf68a7bbd670cc9
+bgr565le            614897eaeb422bd9a972f8ee51909be5
 bgr8                7f007fa6c153a16e808a9c51605a4016
-bgra                a5e7040f9a80cccd65e5acf2ca09ace5
+bgra                01cfdda1f72fcabb6c46424e27f8c519
 gray                d7786a7d9d99ac74230cc045cab5632c
 gray16be            5ba22d4802b40ec27e62abb22ad1d1cc
 gray16le            2d5e83aa875a4c3baa6fecf55e3223bf
-monob               88c4c050758e64d120f50c7eff694381
-monow               d31772ebaa877fc2a78565937f7f9673
+monob               cb62f31b701c6e987b574974d1b31e32
+monow               fd5d417ab7728acddffc06870661df61
 nv12                4676d59db43d657dc12841f6bc3ab452
 nv21                69c699510ff1fb777b118ebee1002f14
-rgb24               514692e28e8ff6860e415ce4fcf6eb8c
-rgb48be             1894cd30dabcd3180518e4d5f09f25e7
-rgb48le             1354e6e27ce3c1d4d4989ee56030c94b
+rgb24               13ff53ebeab74dc05492836f1cfbd2c1
+rgb48be             7f6b1f8139c6a64eadf9dfa867ac20e8
+rgb48le             2756d8710c152cbc367656de4d0f1b76
 rgb4_byte           d81ffd3add95842a618eec81024f0b5c
-rgb555le            a350ef1dc2c9688ed49e7ba018843795
-rgb565le            6f4bb711238baa762d73305213f8d035
+rgb555le            bd698d86c03170c4a16607c0fd1f750f
+rgb565le            bfa0c639d80c3c03fd0c9e5f34296a5e
 rgb8                091d0170b354ef0e97312b95feb5483f
-rgba                a3d362f222098a00e63867f612018659
+rgba                16873e3ac914e76116629a5ff8940ac4
 uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
diff --git a/tests/ref/vsynth1/flashsv b/tests/ref/vsynth1/flashsv
index 7920193aa7..77fefe0bc0 100644
--- a/tests/ref/vsynth1/flashsv
+++ b/tests/ref/vsynth1/flashsv
@@ -1,4 +1,4 @@
 97894502b4cb57aca1105b6333f72dae *./tests/data/vsynth1/flashsv.flv
 14681925 ./tests/data/vsynth1/flashsv.flv
-947cb24ec45a453348ae6fe3fa278071 *./tests/data/flashsv.vsynth1.out.yuv
-stddev:    2.85 PSNR: 39.03 MAXDIFF:   49 bytes:  7603200/  7603200
+791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/flashsv.vsynth1.out.yuv
+stddev:    2.84 PSNR: 39.04 MAXDIFF:   49 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth1/jpegls b/tests/ref/vsynth1/jpegls
index 636f7fc556..153f9b9c0f 100644
--- a/tests/ref/vsynth1/jpegls
+++ b/tests/ref/vsynth1/jpegls
@@ -1,4 +1,4 @@
 519e26bb1ac0f3db8f90b36537f2f760 *./tests/data/vsynth1/jpegls.avi
 9089812 ./tests/data/vsynth1/jpegls.avi
-947cb24ec45a453348ae6fe3fa278071 *./tests/data/jpegls.vsynth1.out.yuv
-stddev:    2.85 PSNR: 39.03 MAXDIFF:   49 bytes:  7603200/  7603200
+791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/jpegls.vsynth1.out.yuv
+stddev:    2.84 PSNR: 39.04 MAXDIFF:   49 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth1/msvideo1 b/tests/ref/vsynth1/msvideo1
index 479234d846..7978258c99 100644
--- a/tests/ref/vsynth1/msvideo1
+++ b/tests/ref/vsynth1/msvideo1
@@ -1,4 +1,4 @@
 267a152a73cbc5ac4694a6e3b254be34 *./tests/data/vsynth1/msvideo1.avi
 2162264 ./tests/data/vsynth1/msvideo1.avi
-cf15dd12b8347567ae350383bf4ef4bb *./tests/data/msvideo1.vsynth1.out.yuv
-stddev:   11.81 PSNR: 26.68 MAXDIFF:  151 bytes:  7603200/  7603200
+c0665fac1bd896b6fe7fe0eead805bd5 *./tests/data/msvideo1.vsynth1.out.yuv
+stddev:   11.80 PSNR: 26.69 MAXDIFF:  151 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth1/qtrle b/tests/ref/vsynth1/qtrle
index 9988897b91..d25b986184 100644
--- a/tests/ref/vsynth1/qtrle
+++ b/tests/ref/vsynth1/qtrle
@@ -1,4 +1,4 @@
 d14041925ce5ec5001dc519276b1a1ab *./tests/data/vsynth1/qtrle.mov
 15263232 ./tests/data/vsynth1/qtrle.mov
-243325fb2cae1a9245efd49aff936327 *./tests/data/qtrle.vsynth1.out.yuv
-stddev:    3.42 PSNR: 37.43 MAXDIFF:   48 bytes:  7603200/  7603200
+93695a27c24a61105076ca7b1f010bbd *./tests/data/qtrle.vsynth1.out.yuv
+stddev:    3.42 PSNR: 37.44 MAXDIFF:   48 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth1/rgb b/tests/ref/vsynth1/rgb
index 10a0a13329..1c353dae7f 100644
--- a/tests/ref/vsynth1/rgb
+++ b/tests/ref/vsynth1/rgb
@@ -1,4 +1,4 @@
 05f0719cb52486d9a4beb9cfae3f2571 *./tests/data/vsynth1/rgb.avi
 15213260 ./tests/data/vsynth1/rgb.avi
-243325fb2cae1a9245efd49aff936327 *./tests/data/rgb.vsynth1.out.yuv
-stddev:    3.42 PSNR: 37.43 MAXDIFF:   48 bytes:  7603200/  7603200
+93695a27c24a61105076ca7b1f010bbd *./tests/data/rgb.vsynth1.out.yuv
+stddev:    3.42 PSNR: 37.44 MAXDIFF:   48 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/flashsv b/tests/ref/vsynth2/flashsv
index bfbb9e1c08..0b7b7d925e 100644
--- a/tests/ref/vsynth2/flashsv
+++ b/tests/ref/vsynth2/flashsv
@@ -1,4 +1,4 @@
 0667077971e0cb63b5f49c580006e90e *./tests/data/vsynth2/flashsv.flv
 12368953 ./tests/data/vsynth2/flashsv.flv
-592b3321994e26a990deb3a0a1415de9 *./tests/data/flashsv.vsynth2.out.yuv
-stddev:    0.65 PSNR: 51.84 MAXDIFF:   14 bytes:  7603200/  7603200
+3a984506f1ebfc9fb73b6814cab201cc *./tests/data/flashsv.vsynth2.out.yuv
+stddev:    0.66 PSNR: 51.73 MAXDIFF:   14 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/jpegls b/tests/ref/vsynth2/jpegls
index e7fa2df46c..229937ec80 100644
--- a/tests/ref/vsynth2/jpegls
+++ b/tests/ref/vsynth2/jpegls
@@ -1,4 +1,4 @@
 4fc53937f048c900ae6d50fda9dba206 *./tests/data/vsynth2/jpegls.avi
 8334630 ./tests/data/vsynth2/jpegls.avi
-592b3321994e26a990deb3a0a1415de9 *./tests/data/jpegls.vsynth2.out.yuv
-stddev:    0.65 PSNR: 51.84 MAXDIFF:   14 bytes:  7603200/  7603200
+3a984506f1ebfc9fb73b6814cab201cc *./tests/data/jpegls.vsynth2.out.yuv
+stddev:    0.66 PSNR: 51.73 MAXDIFF:   14 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/msvideo1 b/tests/ref/vsynth2/msvideo1
index b630b159e4..f71be35278 100644
--- a/tests/ref/vsynth2/msvideo1
+++ b/tests/ref/vsynth2/msvideo1
@@ -1,4 +1,4 @@
 5dddbbd6616d9be4bc0fd0c9650bd9e3 *./tests/data/vsynth2/msvideo1.avi
 1259308 ./tests/data/vsynth2/msvideo1.avi
-e2e7a952135f6307a74f2e178dc0df20 *./tests/data/msvideo1.vsynth2.out.yuv
-stddev:    7.42 PSNR: 30.71 MAXDIFF:  123 bytes:  7603200/  7603200
+cd83ffcbc73573044e3aead3094229e5 *./tests/data/msvideo1.vsynth2.out.yuv
+stddev:    7.42 PSNR: 30.72 MAXDIFF:  123 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/qtrle b/tests/ref/vsynth2/qtrle
index 6b2a01168e..d87a6b96ac 100644
--- a/tests/ref/vsynth2/qtrle
+++ b/tests/ref/vsynth2/qtrle
@@ -1,4 +1,4 @@
 d8c1604dc46d9aa4ec0385e6722c6989 *./tests/data/vsynth2/qtrle.mov
 14798419 ./tests/data/vsynth2/qtrle.mov
-b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/qtrle.vsynth2.out.yuv
-stddev:    1.26 PSNR: 46.06 MAXDIFF:   13 bytes:  7603200/  7603200
+98d0e2854731472c5bf13d8638502d0a *./tests/data/qtrle.vsynth2.out.yuv
+stddev:    1.26 PSNR: 46.10 MAXDIFF:   13 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/rgb b/tests/ref/vsynth2/rgb
index ea83470814..bcdef36b72 100644
--- a/tests/ref/vsynth2/rgb
+++ b/tests/ref/vsynth2/rgb
@@ -1,4 +1,4 @@
 f2e9c419023c743bf99aa5b2e55ad233 *./tests/data/vsynth2/rgb.avi
 15213260 ./tests/data/vsynth2/rgb.avi
-b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/rgb.vsynth2.out.yuv
-stddev:    1.26 PSNR: 46.06 MAXDIFF:   13 bytes:  7603200/  7603200
+98d0e2854731472c5bf13d8638502d0a *./tests/data/rgb.vsynth2.out.yuv
+stddev:    1.26 PSNR: 46.10 MAXDIFF:   13 bytes:  7603200/  7603200

From 094aa84b03632370dde171935171321bbfa9548b Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 26 May 2011 20:19:04 +0200
Subject: [PATCH 341/830] muxers: Add a flag to mark muxers that allow (non
 strict) monotone timestamps.

---
 libavformat/avformat.h    | 3 +++
 libavformat/matroskaenc.c | 2 +-
 libavformat/utils.c       | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 1607a3092b..00fe8a6a45 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -262,6 +262,9 @@ typedef struct AVFormatParameters {
 #define AVFMT_NOSTREAMS     0x1000 /**< Format does not require any streams */
 #define AVFMT_NOBINSEARCH   0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */
 #define AVFMT_NOGENSEARCH   0x4000 /**< Format does not allow to fallback to generic search */
+#define AVFMT_TS_NONSTRICT  0x8000 /**< Format does not require strictly
+                                          increasing timestamps, but they must
+                                          still be monotonic */
 
 typedef struct AVOutputFormat {
     const char *name;
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index c3e203cb36..ae29765a4a 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -1220,7 +1220,7 @@ AVOutputFormat ff_webm_muxer = {
     mkv_write_header,
     mkv_write_packet,
     mkv_write_trailer,
-    .flags = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS,
+    .flags = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS | AVFMT_TS_NONSTRICT,
 };
 #endif
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3c9a89dc64..3432b2269f 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2993,7 +2993,7 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt){
         pkt->dts= st->pts_buffer[0];
     }
 
-    if(st->cur_dts && st->cur_dts != AV_NOPTS_VALUE && st->cur_dts >= pkt->dts){
+    if(st->cur_dts && st->cur_dts != AV_NOPTS_VALUE && ((!(s->oformat->flags & AVFMT_TS_NONSTRICT) && st->cur_dts >= pkt->dts) || st->cur_dts > pkt->dts)){
         av_log(s, AV_LOG_ERROR,
                "Application provided invalid, non monotonically increasing dts to muxer in stream %d: %"PRId64" >= %"PRId64"\n",
                st->index, st->cur_dts, pkt->dts);

From a138121bf1f952073914d493c099ebfea22ac5f7 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Thu, 26 May 2011 20:20:36 +0200
Subject: [PATCH 342/830] webm: Additional options/presets for VP8 encodes
 under FFmpeg

---
 libavcodec/libvpxenc.c | 53 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index ef85b3074a..b55e7559b4 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -30,6 +30,7 @@
 
 #include "avcodec.h"
 #include "libavutil/base64.h"
+#include "libavutil/opt.h"
 
 /**
  * Portion of struct vpx_codec_cx_pkt from vpx_encoder.h.
@@ -50,10 +51,45 @@ typedef struct VP8EncoderContext {
     struct vpx_codec_ctx encoder;
     struct vpx_image rawimg;
     struct vpx_fixed_buf twopass_stats;
-    unsigned long deadline; //i.e., RT/GOOD/BEST
+    int deadline; //i.e., RT/GOOD/BEST
     struct FrameListData *coded_frame_list;
+
+    int cpuused;
+
+    /**
+     * VP8 specific flags, see VP8F_* below.
+     */
+    int flags;
+#define VP8F_ERROR_RESILIENT 0x00000001 ///< Enable measures appropriate for streaming over lossy links
+#define VP8F_AUTO_ALT_REF    0x00000002 ///< Enable automatic alternate reference frame generation
+
+    int arnr_max_frames;
+    int arnr_strength;
+    int arnr_type;
 } VP8Context;
 
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+
+static const AVOption options[]={
+{"speed", "", offsetof(VP8Context, cpuused), FF_OPT_TYPE_INT, 3, -16, 16, V|E},
+{"quality", "", offsetof(VP8Context, deadline), FF_OPT_TYPE_INT, VPX_DL_GOOD_QUALITY, INT_MIN, INT_MAX, V|E, "quality"},
+{"best", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_BEST_QUALITY, INT_MIN, INT_MAX, V|E, "quality"},
+{"good", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_GOOD_QUALITY, INT_MIN, INT_MAX, V|E, "quality"},
+{"realtime", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_REALTIME, INT_MIN, INT_MAX, V|E, "quality"},
+{"vp8flags", "", offsetof(VP8Context, flags), FF_OPT_TYPE_FLAGS, 0, 0, UINT_MAX, V|E, "flags"},
+{"error_resilient", "enable error resilience", 0, FF_OPT_TYPE_CONST, VP8F_ERROR_RESILIENT, INT_MIN, INT_MAX, V|E, "flags"},
+{"altref", "enable use of alternate reference frames (VP8/2-pass only)", 0, FF_OPT_TYPE_CONST, VP8F_AUTO_ALT_REF, INT_MIN, INT_MAX, V|E, "flags"},
+{"arnr_max_frames", "altref noise reduction max frame count", offsetof(VP8Context, arnr_max_frames), FF_OPT_TYPE_INT, 0, 0, 15, V|E},
+{"arnr_strength", "altref noise reduction filter strength", offsetof(VP8Context, arnr_strength), FF_OPT_TYPE_INT, 3, 0, 6, V|E},
+{"arnr_type", "altref noise reduction filter type", offsetof(VP8Context, arnr_type), FF_OPT_TYPE_INT, 3, 1, 3, V|E},
+{NULL}
+};
+static const AVClass class = { "libvpx", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
+
+#undef V
+#undef E
+
 /** String mappings for enum vp8e_enc_control_id */
 static const char *ctlidstr[] = {
     [VP8E_UPD_ENTROPY]           = "VP8E_UPD_ENTROPY",
@@ -205,7 +241,6 @@ static av_cold int vp8_init(AVCodecContext *avctx)
 {
     VP8Context *ctx = avctx->priv_data;
     const struct vpx_codec_iface *iface = &vpx_codec_vp8_cx_algo;
-    int cpuused = 3;
     struct vpx_codec_enc_cfg enccfg;
     int res;
 
@@ -224,6 +259,7 @@ static av_cold int vp8_init(AVCodecContext *avctx)
     enccfg.g_timebase.num = avctx->time_base.num;
     enccfg.g_timebase.den = avctx->time_base.den;
     enccfg.g_threads      = avctx->thread_count;
+    enccfg.g_lag_in_frames= FFMIN(avctx->rc_lookahead, 25);  //0-25, avoids init failure
 
     if (avctx->flags & CODEC_FLAG_PASS1)
         enccfg.g_pass = VPX_RC_FIRST_PASS;
@@ -259,6 +295,7 @@ static av_cold int vp8_init(AVCodecContext *avctx)
         enccfg.rc_buf_initial_sz =
             avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
     enccfg.rc_buf_optimal_sz     = enccfg.rc_buf_sz * 5 / 6;
+    enccfg.rc_undershoot_pct     = round(avctx->rc_buffer_aggressivity * 100);
 
     //_enc_init() will balk if kf_min_dist differs from max w/VPX_KF_AUTO
     if (avctx->keyint_min == avctx->gop_size)
@@ -294,13 +331,14 @@ static av_cold int vp8_init(AVCodecContext *avctx)
         enccfg.rc_twopass_stats_in = ctx->twopass_stats;
     }
 
-    ctx->deadline = VPX_DL_GOOD_QUALITY;
     /* 0-3: For non-zero values the encoder increasingly optimizes for reduced
        complexity playback on low powered devices at the expense of encode
        quality. */
    if (avctx->profile != FF_PROFILE_UNKNOWN)
        enccfg.g_profile = avctx->profile;
 
+    enccfg.g_error_resilient = ctx->flags & VP8F_ERROR_RESILIENT;
+
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
     res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, 0);
@@ -311,11 +349,17 @@ static av_cold int vp8_init(AVCodecContext *avctx)
 
     //codec control failures are currently treated only as warnings
     av_log(avctx, AV_LOG_DEBUG, "vpx_codec_control\n");
-    codecctl_int(avctx, VP8E_SET_CPUUSED,           cpuused);
+    codecctl_int(avctx, VP8E_SET_CPUUSED,           ctx->cpuused);
     codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, avctx->noise_reduction);
     codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
     codecctl_int(avctx, VP8E_SET_STATIC_THRESHOLD,  avctx->mb_threshold);
     codecctl_int(avctx, VP8E_SET_CQ_LEVEL,          (int)avctx->crf);
+    codecctl_int(avctx, VP8E_SET_ENABLEAUTOALTREF,  !!(ctx->flags & VP8F_AUTO_ALT_REF));
+    codecctl_int(avctx, VP8E_SET_ARNR_MAXFRAMES,    ctx->arnr_max_frames);
+    codecctl_int(avctx, VP8E_SET_ARNR_STRENGTH,     ctx->arnr_strength);
+    codecctl_int(avctx, VP8E_SET_ARNR_TYPE,         ctx->arnr_type);
+
+    av_log(avctx, AV_LOG_DEBUG, "Using deadline: %d\n", ctx->deadline);
 
     //provide dummy value to initialize wrapper, values will be updated each _encode()
     vpx_img_wrap(&ctx->rawimg, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1,
@@ -511,4 +555,5 @@ AVCodec ff_libvpx_encoder = {
     CODEC_CAP_DELAY,
     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("libvpx VP8"),
+    .priv_class= &class,
 };

From 40a5dd2f35e0cfcfb92475a8f305fb6f78038507 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 25 May 2011 17:03:12 -0700
Subject: [PATCH 343/830] id3v2: Initialize tflags for version 2.2.

---
 libavformat/id3v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 4fecffe6ba..948261ad97 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -220,7 +220,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
         avio_skip(s->pb, get_size(s->pb, 4));
 
     while (len >= taghdrlen) {
-        unsigned int tflags;
+        unsigned int tflags = 0;
         int tunsync = 0;
 
         if (isv34) {

From 86f868771bac89168086285b71186fd8cf934cc3 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 25 May 2011 17:57:33 -0700
Subject: [PATCH 344/830] id3v2: Check malloc result. ID3v2 tags can be very
 large.

---
 libavformat/id3v2.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 948261ad97..06ae6f8b90 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -237,7 +237,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
             tag[3] = 0;
             tlen = avio_rb24(s->pb);
         }
-        if (tlen < 0 || tlen > len - taghdrlen) {
+        if (tlen <= 0 || tlen > len - taghdrlen) {
             av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
             break;
         }
@@ -256,6 +256,10 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
             if (unsync || tunsync) {
                 int i, j;
                 av_fast_malloc(&buffer, &buffer_size, tlen);
+                if (!buffer) {
+                    av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
+                    goto seek;
+                }
                 for (i = 0, j = 0; i < tlen; i++, j++) {
                     buffer[j] = avio_r8(s->pb);
                     if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
@@ -276,6 +280,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
             break;
         }
         /* Skip to end of tag */
+seek:
         avio_seek(s->pb, next, SEEK_SET);
     }
 

From 701012d676042608cd6ec3317c1936a246f436d7 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Thu, 26 May 2011 23:49:17 +0200
Subject: [PATCH 345/830] Fix 32bit rawvideo in avi on big-endian.

---
 libavcodec/rawdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 9702f98918..f8e119b017 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -56,7 +56,7 @@ static const PixelFormatTag pix_fmt_bps_avi[] = {
     { PIX_FMT_RGB555, 15 },
     { PIX_FMT_RGB555, 16 },
     { PIX_FMT_BGR24,  24 },
-    { PIX_FMT_RGB32,  32 },
+    { PIX_FMT_BGRA,   32 },
     { PIX_FMT_NONE, 0 },
 };
 

From 2660e9e1f398dbc8d447164a772b52281fa94a30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Thu, 26 May 2011 09:29:03 +0200
Subject: [PATCH 346/830] Add metadata conversion table to the wav demuxer

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index baa71561d2..61261e6807 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -26,6 +26,7 @@
 #include "avio_internal.h"
 #include "pcm.h"
 #include "riff.h"
+#include "metadata.h"
 
 typedef struct {
     int64_t data;
@@ -285,6 +286,14 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
     return 0;
 }
 
+static const AVMetadataConv wav_metadata_conv[] = {
+    {"description",      "comment"      },
+    {"originator",       "encoded_by"   },
+    {"origination_date", "date"         },
+    {"origination_time", "creation_time"},
+    {0},
+};
+
 /* wav input */
 static int wav_read_header(AVFormatContext *s,
                            AVFormatParameters *ap)
@@ -391,6 +400,9 @@ break_loop:
         sample_count = (data_size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id));
     if (sample_count)
         st->duration = sample_count;
+
+    ff_metadata_conv_ctx(s, NULL, wav_metadata_conv);
+
     return 0;
 }
 

From 5918d16742277f417f28ae59417b871a2ed8b39c Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Thu, 26 May 2011 19:48:59 +0200
Subject: [PATCH 347/830] Minor cleanup in libx264.c

Not needed as overwritten later on (look for "// update AVCodecContext
with x264 parameters"), and not accessed inbetween.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libx264.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 28683a930a..7a58f2b6e0 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -228,7 +228,6 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_bframe_adaptive = avctx->b_frame_strategy;
     x4->params.i_bframe_bias     = avctx->bframebias;
     x4->params.i_bframe_pyramid  = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? X264_B_PYRAMID_NORMAL : X264_B_PYRAMID_NONE;
-    avctx->has_b_frames          = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? 2 : !!avctx->max_b_frames;
 
     x4->params.i_keyint_min = avctx->keyint_min;
     if (x4->params.i_keyint_min > x4->params.i_keyint_max)

From 152d3519445e59567fea53e18332768072fd6348 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Thu, 26 May 2011 19:52:16 +0200
Subject: [PATCH 348/830] Remove specific note when not specific

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/options.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/options.c b/libavcodec/options.c
index ccf1b87c96..78a7bc8a40 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -353,8 +353,8 @@ static const AVOption options[]={
 {"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 10, V|E},
 {"crf", "enables constant quality mode, and selects the quality (x264/VP8)", OFFSET(crf), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 63, V|E},
 {"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, V|E},
-{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, {.dbl = 25 }, INT_MIN, INT_MAX, V|E},
-{"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E},
+{"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), FF_OPT_TYPE_INT, {.dbl = 25 }, INT_MIN, INT_MAX, V|E},
+{"refs", "reference frames to consider for motion compensation", OFFSET(refs), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E},
 {"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|A|E},

From 28768579aafd31acc74b6534dbc3ed91548afe83 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Thu, 26 May 2011 21:00:37 +0200
Subject: [PATCH 349/830] Fix typo

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libx264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 7a58f2b6e0..a45789db62 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -200,7 +200,7 @@ static void check_default_settings(AVCodecContext *avctx)
     if (score >= 5) {
         av_log(avctx, AV_LOG_ERROR, "Default settings detected, using medium profile\n");
         x4->preset = av_strdup("medium");
-        if (avctx->bit_rate == 200*100)
+        if (avctx->bit_rate == 200*1000)
             avctx->crf = 23;
     }
 }

From 5e0dafe8caa9ba1e87dfc51aa25ebb9fea61ed42 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Thu, 26 May 2011 21:02:50 +0200
Subject: [PATCH 350/830] Fix memleak

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libx264.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a45789db62..b0cca65a7c 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -176,6 +176,7 @@ static av_cold int X264_close(AVCodecContext *avctx)
     av_free(x4->level);
     av_free(x4->stats);
     av_free(x4->weightp);
+    av_free(x4->x264opts);
 
     return 0;
 }

From 3577416212d992f63352f3695c6eb1b63567503d Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 20:14:53 +0200
Subject: [PATCH 351/830] bktr: factorize returning error codes.

This will be useful in the following commit.
---
 libavdevice/bktr.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 821567199e..810bdd67b9 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -249,9 +249,12 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int width, height;
     int frame_rate;
     int frame_rate_base;
+    int ret = 0;
 
-    if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0)
-        return -1;
+    if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) {
+        ret = AVERROR(EINVAL);
+        goto out;
+    }
 
     width = ap->width;
     height = ap->height;
@@ -259,8 +262,10 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     frame_rate_base = ap->time_base.num;
 
     st = av_new_stream(s1, 0);
-    if (!st)
-        return AVERROR(ENOMEM);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto out;
+    }
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in use */
 
     s->width = width;
@@ -289,13 +294,16 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 #endif
 
     if (bktr_init(s1->filename, width, height, s->standard,
-            &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0)
-        return AVERROR(EIO);
+            &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0) {
+        ret = AVERROR(EIO);
+        goto out;
+    }
 
     nsignals = 0;
     last_frame_time = 0;
 
-    return 0;
+out:
+    return ret;
 }
 
 static int grab_read_close(AVFormatContext *s1)

From 33e036967253b83621f378a75d3e4ed199bf4508 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 352/830] bktr: add video_size private option.

---
 libavdevice/bktr.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 810bdd67b9..e8ff557b73 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -27,6 +27,7 @@
 #include "libavformat/avformat.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #if HAVE_DEV_BKTR_IOCTL_METEOR_H && HAVE_DEV_BKTR_IOCTL_BT848_H
 # include <dev/bktr/ioctl_meteor.h>
 # include <dev/bktr/ioctl_bt848.h>
@@ -57,6 +58,7 @@ typedef struct {
     int frame_rate_base;
     uint64_t per_frame;
     int standard;
+    char *video_size; /**< String describing video size, set by a private option. */
 } VideoData;
 
 
@@ -251,13 +253,21 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int frame_rate_base;
     int ret = 0;
 
-    if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) {
+    if (ap->time_base.den <= 0) {
         ret = AVERROR(EINVAL);
         goto out;
     }
 
-    width = ap->width;
-    height = ap->height;
+    if ((ret = av_parse_video_size(&width, &height, s->video_size)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        goto out;
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        width = ap->width;
+    if (ap->height > 0)
+        height = ap->height;
+#endif
     frame_rate = ap->time_base.den;
     frame_rate_base = ap->time_base.num;
 
@@ -303,6 +313,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     last_frame_time = 0;
 
 out:
+    av_freep(&s->video_size);
     return ret;
 }
 
@@ -324,6 +335,8 @@ static int grab_read_close(AVFormatContext *s1)
     return 0;
 }
 
+#define OFFSET(x) offsetof(VideoData, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "standard", "", offsetof(VideoData, standard), FF_OPT_TYPE_INT, {.dbl = VIDEO_FORMAT}, PAL, NTSCJ, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "PAL",      "", 0, FF_OPT_TYPE_CONST, {.dbl = PAL},   0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
@@ -332,6 +345,7 @@ static const AVOption options[] = {
     { "PALN",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALN},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "PALM",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALM},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "NTSCJ",    "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSCJ}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC },
     { NULL },
 };
 

From 284bac2e7770685831a8389e8f2eaae977d4daa4 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 17:13:09 +0200
Subject: [PATCH 353/830] libdc1394: return meaninful error codes.

---
 libavdevice/libdc1394.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 336c465311..50fc033d42 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -116,6 +116,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     int width                = !ap->width ? 320 : ap->width;
     int height               = !ap->height ? 240 : ap->height;
     int frame_rate           = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num);
+    int ret = 0;
 
     for (fmt = dc1394_frame_formats; fmt->width; fmt++)
          if (fmt->pix_fmt == pix_fmt && fmt->width == width && fmt->height == height)
@@ -128,13 +129,16 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     if (!fps->frame_rate || !fmt->width) {
         av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", avcodec_get_pix_fmt_name(pix_fmt),
                                                                                                 width, height, frame_rate);
+        ret = AVERROR(EINVAL);
         goto out;
     }
 
     /* create a video stream */
     vst = av_new_stream(c, 0);
-    if (!vst)
+    if (!vst) {
+        ret = AVERROR(ENOMEM);
         goto out;
+    }
     av_set_pts_info(vst, 64, 1, 1000);
     vst->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     vst->codec->codec_id = CODEC_ID_RAWVIDEO;
@@ -156,9 +160,8 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, fps->frame_rate, 1000);
     *select_fps = fps;
     *select_fmt = fmt;
-    return 0;
 out:
-    return -1;
+    return ret;
 }
 
 #if HAVE_LIBDC1394_1

From fd48620e3e259cdb8df9e7d677a943d6e7d3575b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 354/830] libdc1394: add video_size private option.

---
 libavdevice/libdc1394.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 50fc033d42..4462262c93 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -24,6 +24,7 @@
 #include "libavformat/avformat.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 
 #if HAVE_LIBDC1394_2
 #include <dc1394/dc1394.h>
@@ -59,6 +60,7 @@ typedef struct dc1394_data {
 #endif
     int current_frame;
     int fps;
+    char *video_size;       /**< String describing video size, set by a private option. */
 
     AVPacket packet;
 } dc1394_data;
@@ -90,10 +92,13 @@ struct dc1394_frame_rate {
     { 0, 0 } /* gotta be the last one */
 };
 
+#define OFFSET(x) offsetof(dc1394_data, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
 #if HAVE_LIBDC1394_1
     { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
 #endif
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC },
     { NULL },
 };
 
@@ -113,11 +118,21 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     struct dc1394_frame_format *fmt;
     struct dc1394_frame_rate *fps;
     enum PixelFormat pix_fmt = ap->pix_fmt == PIX_FMT_NONE ? PIX_FMT_UYVY422 : ap->pix_fmt; /* defaults */
-    int width                = !ap->width ? 320 : ap->width;
-    int height               = !ap->height ? 240 : ap->height;
+    int width, height;
     int frame_rate           = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num);
     int ret = 0;
 
+    if ((ret = av_parse_video_size(&width, &height, dc1394->video_size)) < 0) {
+        av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        goto out;
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        width = ap->width;
+    if (ap->height > 0)
+        height = ap->height;
+#endif
+
     for (fmt = dc1394_frame_formats; fmt->width; fmt++)
          if (fmt->pix_fmt == pix_fmt && fmt->width == width && fmt->height == height)
              break;
@@ -161,6 +176,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     *select_fps = fps;
     *select_fmt = fmt;
 out:
+    av_freep(&dc1394->video_size);
     return ret;
 }
 

From 82b5aa0add203e6e90e3bbefbb9eb02118256ff6 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 20:37:08 +0200
Subject: [PATCH 355/830] v4l2: factorize returning error codes.

This will be useful in the following commit.
---
 libavdevice/v4l2.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 566ee92801..30eb972bf5 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -577,13 +577,14 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 {
     struct video_data *s = s1->priv_data;
     AVStream *st;
-    int res;
+    int res = 0;
     uint32_t desired_format, capabilities;
     enum CodecID codec_id;
 
     st = av_new_stream(s1, 0);
     if (!st) {
-        return AVERROR(ENOMEM);
+        res = AVERROR(ENOMEM);
+        goto out;
     }
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
 
@@ -593,7 +594,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     capabilities = 0;
     s->fd = device_open(s1, &capabilities);
     if (s->fd < 0) {
-        return AVERROR(EIO);
+        res = AVERROR(EIO);
+        goto out;
     }
     av_log(s1, AV_LOG_VERBOSE, "[%d]Capabilities: %x\n", s->fd, capabilities);
 
@@ -604,7 +606,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
         if (ioctl(s->fd, VIDIOC_G_FMT, &fmt) < 0) {
             av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_FMT): %s\n", strerror(errno));
-            return AVERROR(errno);
+            res = AVERROR(errno);
+            goto out;
         }
         s->width  = fmt.fmt.pix.width;
         s->height = fmt.fmt.pix.height;
@@ -617,14 +620,15 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
                "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt);
         close(s->fd);
 
-        return AVERROR(EIO);
+        res = AVERROR(EIO);
+        goto out;
     }
-    if (av_image_check_size(s->width, s->height, 0, s1) < 0)
-        return AVERROR(EINVAL);
+    if ((res = av_image_check_size(s->width, s->height, 0, s1) < 0))
+        goto out;
     s->frame_format = desired_format;
 
-    if (v4l2_set_parameters(s1, ap) < 0)
-        return AVERROR(EIO);
+    if ((res = v4l2_set_parameters(s1, ap) < 0))
+        goto out;
 
     st->codec->pix_fmt = fmt_v4l2ff(desired_format, codec_id);
     s->frame_size = avpicture_get_size(st->codec->pix_fmt, s->width, s->height);
@@ -641,7 +645,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     if (res < 0) {
         close(s->fd);
 
-        return AVERROR(EIO);
+        res = AVERROR(EIO);
+        goto out;
     }
     s->top_field_first = first_field(s->fd);
 
@@ -653,7 +658,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->time_base.num = ap->time_base.num;
     st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
 
-    return 0;
+out:
+    return res;
 }
 
 static int v4l2_read_packet(AVFormatContext *s1, AVPacket *pkt)

From 8fe7b6443fe8721215f1abac3b854cd52092c909 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 356/830] v4l2: add video_size private option.

---
 libavdevice/v4l2.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 30eb972bf5..f5baf08b42 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -46,6 +46,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 
 static const int desired_video_buffers = 256;
 
@@ -69,6 +70,7 @@ struct video_data {
     unsigned int *buf_len;
     char *standard;
     int channel;
+    char *video_size; /**< String describing video size, set by a private option. */
 };
 
 struct buff_data {
@@ -588,8 +590,16 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     }
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
 
-    s->width  = ap->width;
-    s->height = ap->height;
+    if (s->video_size && (res = av_parse_video_size(&s->width, &s->height, s->video_size)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        goto out;
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        s->width  = ap->width;
+    if (ap->height > 0)
+        s->height = ap->height;
+#endif
 
     capabilities = 0;
     s->fd = device_open(s1, &capabilities);
@@ -659,6 +669,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
 
 out:
+    av_freep(&s->video_size);
     return res;
 }
 
@@ -702,9 +713,12 @@ static int v4l2_read_close(AVFormatContext *s1)
     return 0;
 }
 
+#define OFFSET(x) offsetof(struct video_data, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From 3e15ea21504a5cc1765afd78dd72ef486a10a27b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 357/830] vfwcap: add video_size private option.

---
 libavdevice/vfwcap.c | 46 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 8eecf5bdff..d307e11184 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -20,6 +20,9 @@
  */
 
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #include <windows.h>
 #include <vfw.h>
 
@@ -34,12 +37,14 @@
 /* End of missing MinGW defines */
 
 struct vfw_ctx {
+    const AVClass *class;
     HWND hwnd;
     HANDLE mutex;
     HANDLE event;
     AVPacketList *pktl;
     unsigned int curbufsize;
     unsigned int frame_num;
+    char *video_size;       /**< A string describing video size, set by a private option. */
 };
 
 static enum PixelFormat vfw_pixfmt(DWORD biCompression, WORD biBitCount)
@@ -230,6 +235,8 @@ static int vfw_read_close(AVFormatContext *s)
         pktl = next;
     }
 
+    av_freep(&ctx->video_size);
+
     return 0;
 }
 
@@ -244,8 +251,6 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
     CAPTUREPARMS cparms;
     DWORD biCompression;
     WORD biBitCount;
-    int width;
-    int height;
     int ret;
 
     if (!strcmp(s->filename, "list")) {
@@ -318,10 +323,20 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
     dump_bih(s, &bi->bmiHeader);
 
-    width  = ap->width  ? ap->width  : bi->bmiHeader.biWidth ;
-    height = ap->height ? ap->height : bi->bmiHeader.biHeight;
-    bi->bmiHeader.biWidth  = width ;
-    bi->bmiHeader.biHeight = height;
+
+    if (ctx->video_size) {
+        ret = av_parse_video_size(&bi->bmiHeader.biWidth, &bi->bmiHeader.biHeight, ctx->video_size);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
+            goto fail_bi;
+        }
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        bi->bmiHeader.biWidth = ap->width;
+    if (ap->height > 0)
+        bi->bmiHeader.biHeight = ap->height;
+#endif
 
     if (0) {
         /* For testing yet unsupported compressions
@@ -370,8 +385,8 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
     codec = st->codec;
     codec->time_base = ap->time_base;
     codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    codec->width = width;
-    codec->height = height;
+    codec->width  = bi->bmiHeader.biWidth;
+    codec->height = bi->bmiHeader.biHeight;
     codec->pix_fmt = vfw_pixfmt(biCompression, biBitCount);
     if(codec->pix_fmt == PIX_FMT_NONE) {
         codec->codec_id = vfw_codecid(biCompression);
@@ -452,6 +467,20 @@ static int vfw_read_packet(AVFormatContext *s, AVPacket *pkt)
     return pkt->size;
 }
 
+#define OFFSET(x) offsetof(struct vfw_ctx, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { NULL },
+};
+
+static const AVClass vfw_class = {
+    .class_name = "VFW indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_vfwcap_demuxer = {
     "vfwcap",
     NULL_IF_CONFIG_SMALL("VFW video capture"),
@@ -461,4 +490,5 @@ AVInputFormat ff_vfwcap_demuxer = {
     vfw_read_packet,
     vfw_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &vfw_class,
 };

From 3102fb0351fb9dd96543b3f1b9c4c04758226ee5 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 21:17:05 +0200
Subject: [PATCH 358/830] x11grab: factorize returning error codes.

---
 libavdevice/x11grab.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index 0e63d09fea..2e1f019584 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -91,6 +91,7 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int y_off = 0;
     int use_shm;
     char *param, *offset;
+    int ret = 0;
 
     param = av_strdup(s1->filename);
     offset = strchr(param, '+');
@@ -105,17 +106,20 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     dpy = XOpenDisplay(param);
     if(!dpy) {
         av_log(s1, AV_LOG_ERROR, "Could not open X display.\n");
-        return AVERROR(EIO);
+        ret = AVERROR(EIO);
+        goto out;
     }
 
     if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) {
         av_log(s1, AV_LOG_ERROR, "AVParameters don't have video size and/or rate. Use -s and -r.\n");
-        return AVERROR(EIO);
+        ret = AVERROR(EINVAL);
+        goto out;
     }
 
     st = av_new_stream(s1, 0);
     if (!st) {
-        return AVERROR(ENOMEM);
+        ret = AVERROR(ENOMEM);
+        goto out;
     }
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
 
@@ -136,7 +140,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
                                         IPC_CREAT|0777);
         if (x11grab->shminfo.shmid == -1) {
             av_log(s1, AV_LOG_ERROR, "Fatal: Can't get shared memory!\n");
-            return AVERROR(ENOMEM);
+            ret = AVERROR(ENOMEM);
+            goto out;
         }
         x11grab->shminfo.shmaddr = image->data = shmat(x11grab->shminfo.shmid, 0, 0);
         x11grab->shminfo.readOnly = False;
@@ -144,7 +149,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         if (!XShmAttach(dpy, &x11grab->shminfo)) {
             av_log(s1, AV_LOG_ERROR, "Fatal: Failed to attach shared memory!\n");
             /* needs some better error subroutine :) */
-            return AVERROR(EIO);
+            ret = AVERROR(EIO);
+            goto out;
         }
     } else {
         image = XGetImage(dpy, RootWindow(dpy, DefaultScreen(dpy)),
@@ -172,7 +178,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         } else {
             av_log(s1, AV_LOG_ERROR, "RGB ordering at image depth %i not supported ... aborting\n", image->bits_per_pixel);
             av_log(s1, AV_LOG_ERROR, "color masks: r 0x%.6lx g 0x%.6lx b 0x%.6lx\n", image->red_mask, image->green_mask, image->blue_mask);
-            return AVERROR(EIO);
+            ret = AVERROR(EIO);
+            goto out;
         }
         break;
     case 24:
@@ -187,7 +194,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         } else {
             av_log(s1, AV_LOG_ERROR,"rgb ordering at image depth %i not supported ... aborting\n", image->bits_per_pixel);
             av_log(s1, AV_LOG_ERROR, "color masks: r 0x%.6lx g 0x%.6lx b 0x%.6lx\n", image->red_mask, image->green_mask, image->blue_mask);
-            return AVERROR(EIO);
+            ret = AVERROR(EIO);
+            goto out;
         }
         break;
     case 32:
@@ -210,7 +218,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         break;
     default:
         av_log(s1, AV_LOG_ERROR, "image depth %i not supported ... aborting\n", image->bits_per_pixel);
-        return -1;
+        ret = AVERROR(EINVAL);
+        goto out;
     }
 
     x11grab->frame_size = ap->width * ap->height * image->bits_per_pixel/8;
@@ -232,7 +241,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->time_base = ap->time_base;
     st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(ap->time_base) * 8;
 
-    return 0;
+out:
+    return ret;
 }
 
 /**

From 724a900c454f7b41066edcc0443bff083d59f81c Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 359/830] x11grab: add video_size private option.

---
 libavdevice/x11grab.c | 48 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index 2e1f019584..d4fcbca873 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -37,6 +37,9 @@
 
 #include "config.h"
 #include "libavformat/avformat.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #include <time.h>
 #include <X11/X.h>
 #include <X11/Xlib.h>
@@ -52,10 +55,12 @@
  */
 struct x11_grab
 {
+    const AVClass *class;    /**< Class for private options. */
     int frame_size;          /**< Size in bytes of a grabbed frame */
     AVRational time_base;    /**< Time base */
     int64_t time_frame;      /**< Current time */
 
+    char *video_size;        /**< String describing video size, set by a private option. */
     int height;              /**< Height of the grab frame */
     int width;               /**< Width of the grab frame */
     int x_off;               /**< Horizontal top-left corner coordinate */
@@ -101,7 +106,18 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         *offset= 0;
     }
 
-    av_log(s1, AV_LOG_INFO, "device: %s -> display: %s x: %d y: %d width: %d height: %d\n", s1->filename, param, x_off, y_off, ap->width, ap->height);
+    if ((ret = av_parse_video_size(&x11grab->width, &x11grab->height, x11grab->video_size)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        goto out;
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        x11grab->width = ap->width;
+    if (ap->height > 0)
+        x11grab->height = ap->height;
+#endif
+    av_log(s1, AV_LOG_INFO, "device: %s -> display: %s x: %d y: %d width: %d height: %d\n",
+           s1->filename, param, x_off, y_off, x11grab->width, x11grab->height);
 
     dpy = XOpenDisplay(param);
     if(!dpy) {
@@ -110,7 +126,7 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         goto out;
     }
 
-    if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) {
+    if (ap->time_base.den <= 0) {
         av_log(s1, AV_LOG_ERROR, "AVParameters don't have video size and/or rate. Use -s and -r.\n");
         ret = AVERROR(EINVAL);
         goto out;
@@ -134,7 +150,7 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
                                 ZPixmap,
                                 NULL,
                                 &x11grab->shminfo,
-                                ap->width, ap->height);
+                                x11grab->width, x11grab->height);
         x11grab->shminfo.shmid = shmget(IPC_PRIVATE,
                                         image->bytes_per_line * image->height,
                                         IPC_CREAT|0777);
@@ -155,7 +171,7 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     } else {
         image = XGetImage(dpy, RootWindow(dpy, DefaultScreen(dpy)),
                           x_off,y_off,
-                          ap->width,ap->height,
+                          x11grab->width, x11grab->height,
                           AllPlanes, ZPixmap);
     }
 
@@ -222,10 +238,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         goto out;
     }
 
-    x11grab->frame_size = ap->width * ap->height * image->bits_per_pixel/8;
+    x11grab->frame_size = x11grab->width * x11grab->height * image->bits_per_pixel/8;
     x11grab->dpy = dpy;
-    x11grab->width = ap->width;
-    x11grab->height = ap->height;
     x11grab->time_base  = ap->time_base;
     x11grab->time_frame = av_gettime() / av_q2d(ap->time_base);
     x11grab->x_off = x_off;
@@ -235,13 +249,14 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id = CODEC_ID_RAWVIDEO;
-    st->codec->width = ap->width;
-    st->codec->height = ap->height;
+    st->codec->width  = x11grab->width;
+    st->codec->height = x11grab->height;
     st->codec->pix_fmt = input_pixfmt;
     st->codec->time_base = ap->time_base;
     st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(ap->time_base) * 8;
 
 out:
+    av_freep(&x11grab->video_size);
     return ret;
 }
 
@@ -449,6 +464,20 @@ x11grab_read_close(AVFormatContext *s1)
     return 0;
 }
 
+#define OFFSET(x) offsetof(struct x11_grab, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC },
+    { NULL },
+};
+
+static const AVClass x11_class = {
+    .class_name = "X11grab indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 /** x11 grabber device demuxer declaration */
 AVInputFormat ff_x11_grab_device_demuxer =
 {
@@ -460,4 +489,5 @@ AVInputFormat ff_x11_grab_device_demuxer =
     x11grab_read_packet,
     x11grab_read_close,
     .flags = AVFMT_NOFILE,
+    .priv_class = &x11_class,
 };

From 973f686a6c4f7c3b9120a1e22cb7c0159ea9aee2 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 360/830] rawdec: add video_size private option.

---
 libavformat/rawdec.c      | 40 ++++++++++++++++++++++++++++++++++++---
 libavformat/rawdec.h      |  6 ++++++
 libavformat/rawvideodec.c |  3 ++-
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index b545dbd6d7..265822b2da 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -24,6 +24,7 @@
 #include "avio_internal.h"
 #include "rawdec.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 
 /* raw input */
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
@@ -66,17 +67,34 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
             av_set_pts_info(st, 64, 1, st->codec->sample_rate);
             break;
             }
-        case AVMEDIA_TYPE_VIDEO:
+        case AVMEDIA_TYPE_VIDEO: {
+            FFRawVideoDemuxerContext *s1 = s->priv_data;
+            int width = 0, height = 0, ret;
             if(ap->time_base.num)
                 av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
             else
                 av_set_pts_info(st, 64, 1, 25);
-            st->codec->width = ap->width;
-            st->codec->height = ap->height;
+            if (s1->video_size) {
+                ret = av_parse_video_size(&width, &height, s1->video_size);
+                av_freep(&s1->video_size);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
+                    return ret;
+                }
+            }
+#if FF_API_FORMAT_PARAMETERS
+            if (ap->width > 0)
+                width = ap->width;
+            if (ap->height > 0)
+                height = ap->height;
+#endif
+            st->codec->width  = width;
+            st->codec->height = height;
             st->codec->pix_fmt = ap->pix_fmt;
             if(st->codec->pix_fmt == PIX_FMT_NONE)
                 st->codec->pix_fmt= PIX_FMT_YUV420P;
             break;
+            }
         default:
             return -1;
         }
@@ -165,6 +183,22 @@ const AVClass ff_rawaudio_demuxer_class = {
     .version        = LIBAVUTIL_VERSION_INT,
 };
 
+#define OFFSET(x) offsetof(FFRawVideoDemuxerContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption video_options[] = {
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { NULL },
+};
+#undef OFFSET
+#undef DEC
+
+const AVClass ff_rawvideo_demuxer_class = {
+    .class_name     = "rawvideo demuxer",
+    .item_name      = av_default_item_name,
+    .option         = video_options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 #if CONFIG_G722_DEMUXER
 AVInputFormat ff_g722_demuxer = {
     "g722",
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index e473eb2aac..517efd4042 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -31,7 +31,13 @@ typedef struct RawAudioDemuxerContext {
     int channels;
 } RawAudioDemuxerContext;
 
+typedef struct FFRawVideoDemuxerContext {
+    const AVClass *class;     /**< Class for private options. */
+    char *video_size;         /**< String describing video size, set by a private option. */
+} FFRawVideoDemuxerContext;
+
 extern const AVClass ff_rawaudio_demuxer_class;
+extern const AVClass ff_rawvideo_demuxer_class;
 
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap);
 
diff --git a/libavformat/rawvideodec.c b/libavformat/rawvideodec.c
index a29f7da7a4..f8d9b65f36 100644
--- a/libavformat/rawvideodec.c
+++ b/libavformat/rawvideodec.c
@@ -47,11 +47,12 @@ static int rawvideo_read_packet(AVFormatContext *s, AVPacket *pkt)
 AVInputFormat ff_rawvideo_demuxer = {
     "rawvideo",
     NULL_IF_CONFIG_SMALL("raw video format"),
-    0,
+    sizeof(FFRawVideoDemuxerContext),
     NULL,
     ff_raw_read_header,
     rawvideo_read_packet,
     .flags= AVFMT_GENERIC_INDEX,
     .extensions = "yuv,cif,qcif,rgb",
     .value = CODEC_ID_RAWVIDEO,
+    .priv_class = &ff_rawvideo_demuxer_class,
 };

From 06d8c9e5f0d9db605830d36678ccefeceddce610 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 07:43:01 +0200
Subject: [PATCH 361/830] tty: add video_size private option.

---
 libavformat/tty.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/libavformat/tty.c b/libavformat/tty.c
index bc6058d857..432fcc0c3d 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -28,6 +28,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #include "avformat.h"
 #include "sauce.h"
 
@@ -35,6 +36,7 @@ typedef struct {
     AVClass *class;
     int chars_per_frame;
     uint64_t fsize;  /**< file size less metadata buffer */
+    char *video_size;/**< A string describing video size, set by a private option. */
 } TtyDemuxContext;
 
 /**
@@ -71,14 +73,30 @@ static int read_header(AVFormatContext *avctx,
                        AVFormatParameters *ap)
 {
     TtyDemuxContext *s = avctx->priv_data;
+    int width = 0, height = 0, ret;
     AVStream *st = av_new_stream(avctx, 0);
     if (!st)
         return AVERROR(ENOMEM);
     st->codec->codec_tag   = 0;
     st->codec->codec_type  = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id    = CODEC_ID_ANSI;
-    if (ap->width)  st->codec->width  = ap->width;
-    if (ap->height) st->codec->height = ap->height;
+
+    if (s->video_size) {
+        ret = av_parse_video_size(&width, &height, s->video_size);
+        av_freep(&s->video_size);
+        if (ret < 0) {
+            av_log (avctx, AV_LOG_ERROR, "Couldn't parse video size.\n");
+            return ret;
+        }
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->width > 0)
+        width = ap->width;
+    if (ap->height > 0)
+        height = ap->height;
+#endif
+    st->codec->width  = width;
+    st->codec->height = height;
 
     if (!ap->time_base.num) {
         av_set_pts_info(st, 60, 1, 25);
@@ -129,8 +147,11 @@ static int read_packet(AVFormatContext *avctx, AVPacket *pkt)
     return 0;
 }
 
+#define OFFSET(x) offsetof(TtyDemuxContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "chars_per_frame", "", offsetof(TtyDemuxContext, chars_per_frame), FF_OPT_TYPE_INT, {.dbl = 6000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
+    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From 89d1b7f40671e0d455fe6b6670bf6f2bcf2eea2e Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 08:14:03 +0200
Subject: [PATCH 362/830] lavf: deprecate AVFormatParameters.width/height.

---
 libavformat/avformat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 3a652a354b..847e408641 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -231,9 +231,9 @@ typedef struct AVFormatParameters {
 #if FF_API_FORMAT_PARAMETERS
     attribute_deprecated int sample_rate;
     attribute_deprecated int channels;
+    attribute_deprecated int width;
+    attribute_deprecated int height;
 #endif
-    int width;
-    int height;
     enum PixelFormat pix_fmt;
 #if FF_API_FORMAT_PARAMETERS
     attribute_deprecated int channel; /**< Used to select DV channel. */

From 4779f59378d54f30644ef79ce3a5c402546f3cb9 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 08:44:10 +0200
Subject: [PATCH 363/830] rtspdec: add initial_pause private option.

Deprecate corresponding AVFormatParameters field.
---
 libavformat/avformat.h |  4 ++--
 libavformat/rtsp.h     |  8 ++++++++
 libavformat/rtspdec.c  | 21 ++++++++++++++++++++-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 847e408641..2ea940c186 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -241,9 +241,9 @@ typedef struct AVFormatParameters {
     attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
     /**< deprecated, use mpegtsraw demuxer-specific options instead */
     attribute_deprecated unsigned int mpeg2ts_compute_pcr:1;
+    attribute_deprecated unsigned int initial_pause:1;       /**< Do not begin to play the stream
+                                                                  immediately (RTSP only). */
 #endif
-    unsigned int initial_pause:1;       /**< Do not begin to play the stream
-                                            immediately (RTSP only). */
     unsigned int prealloced_context:1;
 } AVFormatParameters;
 
diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index ff66502626..f5a7fada21 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h
@@ -28,6 +28,8 @@
 #include "network.h"
 #include "httpauth.h"
 
+#include "libavutil/log.h"
+
 /**
  * Network layer over which RTP/etc packet data will be transported.
  */
@@ -196,6 +198,7 @@ enum RTSPServerType {
  * @todo Use AVIOContext instead of URLContext
  */
 typedef struct RTSPState {
+    const AVClass *class;             /**< Class for private options. */
     URLContext *rtsp_hd; /* RTSP TCP connection handle */
 
     /** number of items in the 'rtsp_streams' variable */
@@ -336,6 +339,11 @@ typedef struct RTSPState {
      * Whether the server supports the GET_PARAMETER method.
      */
     int get_parameter_supported;
+
+    /**
+     * Do not begin to play the stream immediately.
+     */
+    int initial_pause;
 } RTSPState;
 
 /**
diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index ccfc4d8e27..fa6bc05517 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
 #include "avformat.h"
 
 #include "internal.h"
@@ -165,7 +166,12 @@ static int rtsp_read_header(AVFormatContext *s,
         return AVERROR(ENOMEM);
     rt->real_setup = rt->real_setup_cache + s->nb_streams;
 
-    if (ap->initial_pause) {
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->initial_pause)
+        rt->initial_pause = ap->initial_pause;
+#endif
+
+    if (rt->initial_pause) {
          /* do not start immediately */
     } else {
          if (rtsp_read_play(s) < 0) {
@@ -399,6 +405,18 @@ static int rtsp_read_close(AVFormatContext *s)
     return 0;
 }
 
+static const AVOption options[] = {
+    { "initial_pause",  "Don't start playing the stream immediately", offsetof(RTSPState, initial_pause),  FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+const AVClass rtsp_demuxer_class = {
+    .class_name     = "RTSP demuxer",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_rtsp_demuxer = {
     "rtsp",
     NULL_IF_CONFIG_SMALL("RTSP input format"),
@@ -411,4 +429,5 @@ AVInputFormat ff_rtsp_demuxer = {
     .flags = AVFMT_NOFILE,
     .read_play = rtsp_read_play,
     .read_pause = rtsp_read_pause,
+    .priv_class = &rtsp_demuxer_class,
 };

From fa4924a35818b2564050119ed7c14cbdd2b56065 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 25 May 2011 08:14:13 +0200
Subject: [PATCH 364/830] rawdec: factor video demuxer definitions into a
 macro.

---
 libavformat/cavsvideodec.c | 11 +----------
 libavformat/diracdec.c     | 11 +----------
 libavformat/dnxhddec.c     | 11 +----------
 libavformat/h261dec.c      | 12 +-----------
 libavformat/h263dec.c      | 12 +-----------
 libavformat/h264dec.c      | 12 +-----------
 libavformat/m4vdec.c       | 12 +-----------
 libavformat/mpegvideodec.c | 11 +----------
 libavformat/rawdec.c       | 23 ++---------------------
 libavformat/rawdec.h       | 12 ++++++++++++
 10 files changed, 22 insertions(+), 105 deletions(-)

diff --git a/libavformat/cavsvideodec.c b/libavformat/cavsvideodec.c
index f58c56f6b6..9aa9413f4c 100644
--- a/libavformat/cavsvideodec.c
+++ b/libavformat/cavsvideodec.c
@@ -65,13 +65,4 @@ static int cavsvideo_probe(AVProbeData *p)
     return 0;
 }
 
-AVInputFormat ff_cavsvideo_demuxer = {
-    "cavsvideo",
-    NULL_IF_CONFIG_SMALL("raw Chinese AVS video"),
-    0,
-    cavsvideo_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .value = CODEC_ID_CAVS,
-};
+FF_DEF_RAWVIDEO_DEMUXER(cavsvideo, "raw Chinese AVS video", cavsvideo_probe, NULL, CODEC_ID_CAVS)
diff --git a/libavformat/diracdec.c b/libavformat/diracdec.c
index b0cb3bfc25..8cbd5b5146 100644
--- a/libavformat/diracdec.c
+++ b/libavformat/diracdec.c
@@ -31,13 +31,4 @@ static int dirac_probe(AVProbeData *p)
         return 0;
 }
 
-AVInputFormat ff_dirac_demuxer = {
-    "dirac",
-    NULL_IF_CONFIG_SMALL("raw Dirac"),
-    0,
-    dirac_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .value = CODEC_ID_DIRAC,
-};
+FF_DEF_RAWVIDEO_DEMUXER(dirac, "raw Dirac", dirac_probe, NULL, CODEC_ID_DIRAC)
diff --git a/libavformat/dnxhddec.c b/libavformat/dnxhddec.c
index df5d2e3867..2aa8017ad4 100644
--- a/libavformat/dnxhddec.c
+++ b/libavformat/dnxhddec.c
@@ -42,13 +42,4 @@ static int dnxhd_probe(AVProbeData *p)
     return AVPROBE_SCORE_MAX;
 }
 
-AVInputFormat ff_dnxhd_demuxer = {
-    "dnxhd",
-    NULL_IF_CONFIG_SMALL("raw DNxHD (SMPTE VC-3)"),
-    0,
-    dnxhd_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .value = CODEC_ID_DNXHD,
-};
+FF_DEF_RAWVIDEO_DEMUXER(dnxhd, "raw DNxHD (SMPTE VC-3)", dnxhd_probe, NULL, CODEC_ID_DNXHD)
diff --git a/libavformat/h261dec.c b/libavformat/h261dec.c
index 8d9c5fa8e5..1b416d4fc7 100644
--- a/libavformat/h261dec.c
+++ b/libavformat/h261dec.c
@@ -62,14 +62,4 @@ static int h261_probe(AVProbeData *p)
     return 0;
 }
 
-AVInputFormat ff_h261_demuxer = {
-    "h261",
-    NULL_IF_CONFIG_SMALL("raw H.261"),
-    0,
-    h261_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .extensions = "h261",
-    .value = CODEC_ID_H261,
-};
+FF_DEF_RAWVIDEO_DEMUXER(h261, "raw H.261", h261_probe, "h261", CODEC_ID_H261)
diff --git a/libavformat/h263dec.c b/libavformat/h263dec.c
index 5eda7afd60..b9185cbcb6 100644
--- a/libavformat/h263dec.c
+++ b/libavformat/h263dec.c
@@ -64,14 +64,4 @@ static int h263_probe(AVProbeData *p)
     return 0;
 }
 
-AVInputFormat ff_h263_demuxer = {
-    "h263",
-    NULL_IF_CONFIG_SMALL("raw H.263"),
-    0,
-    h263_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-//    .extensions = "h263", //FIXME remove after writing mpeg4_probe
-    .value = CODEC_ID_H263,
-};
+FF_DEF_RAWVIDEO_DEMUXER(h263, "raw H.263", h263_probe, NULL, CODEC_ID_H263)
diff --git a/libavformat/h264dec.c b/libavformat/h264dec.c
index 268492cf8d..f9086476d5 100644
--- a/libavformat/h264dec.c
+++ b/libavformat/h264dec.c
@@ -67,14 +67,4 @@ static int h264_probe(AVProbeData *p)
     return 0;
 }
 
-AVInputFormat ff_h264_demuxer = {
-    "h264",
-    NULL_IF_CONFIG_SMALL("raw H.264 video format"),
-    0,
-    h264_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .extensions = "h26l,h264,264", //FIXME remove after writing mpeg4_probe
-    .value = CODEC_ID_H264,
-};
+FF_DEF_RAWVIDEO_DEMUXER(h264 , "raw H.264 video format", h264_probe, "h26l,h264,264", CODEC_ID_H264)
diff --git a/libavformat/m4vdec.c b/libavformat/m4vdec.c
index 3463901d7b..e856aadc10 100644
--- a/libavformat/m4vdec.c
+++ b/libavformat/m4vdec.c
@@ -49,14 +49,4 @@ static int mpeg4video_probe(AVProbeData *probe_packet)
     return 0;
 }
 
-AVInputFormat ff_m4v_demuxer = {
-    "m4v",
-    NULL_IF_CONFIG_SMALL("raw MPEG-4 video format"),
-    0,
-    mpeg4video_probe, /** probing for MPEG-4 data */
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .extensions = "m4v",
-    .value = CODEC_ID_MPEG4,
-};
+FF_DEF_RAWVIDEO_DEMUXER(m4v, "raw MPEG-4 video format", mpeg4video_probe, "m4v", CODEC_ID_MPEG4)
diff --git a/libavformat/mpegvideodec.c b/libavformat/mpegvideodec.c
index 0669820380..9fea117632 100644
--- a/libavformat/mpegvideodec.c
+++ b/libavformat/mpegvideodec.c
@@ -55,13 +55,4 @@ static int mpegvideo_probe(AVProbeData *p)
     return 0;
 }
 
-AVInputFormat ff_mpegvideo_demuxer = {
-    "mpegvideo",
-    NULL_IF_CONFIG_SMALL("raw MPEG video"),
-    0,
-    mpegvideo_probe,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .value = CODEC_ID_MPEG1VIDEO,
-};
+FF_DEF_RAWVIDEO_DEMUXER(mpegvideo, "raw MPEG video", mpegvideo_probe, NULL, CODEC_ID_MPEG1VIDEO)
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 265822b2da..434ed48f05 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -229,17 +229,7 @@ AVInputFormat ff_gsm_demuxer = {
 #endif
 
 #if CONFIG_MJPEG_DEMUXER
-AVInputFormat ff_mjpeg_demuxer = {
-    "mjpeg",
-    NULL_IF_CONFIG_SMALL("raw MJPEG video"),
-    0,
-    NULL,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .flags= AVFMT_GENERIC_INDEX,
-    .extensions = "mjpg,mjpeg",
-    .value = CODEC_ID_MJPEG,
-};
+FF_DEF_RAWVIDEO_DEMUXER(mjpeg, "raw MJPEG video", NULL, "mjpg,mjpeg", CODEC_ID_MJPEG)
 #endif
 
 #if CONFIG_MLP_DEMUXER
@@ -285,14 +275,5 @@ AVInputFormat ff_shorten_demuxer = {
 #endif
 
 #if CONFIG_VC1_DEMUXER
-AVInputFormat ff_vc1_demuxer = {
-    "vc1",
-    NULL_IF_CONFIG_SMALL("raw VC-1"),
-    0,
-    NULL /* vc1_probe */,
-    ff_raw_video_read_header,
-    ff_raw_read_partial_packet,
-    .extensions = "vc1",
-    .value = CODEC_ID_VC1,
-};
+FF_DEF_RAWVIDEO_DEMUXER(vc1, "raw VC-1", NULL, "vc1", CODEC_ID_VC1)
 #endif
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index 517efd4042..99beadd0e1 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -47,4 +47,16 @@ int ff_raw_audio_read_header(AVFormatContext *s, AVFormatParameters *ap);
 
 int ff_raw_video_read_header(AVFormatContext *s, AVFormatParameters *ap);
 
+#define FF_DEF_RAWVIDEO_DEMUXER(shortname, longname, probe, ext, id)\
+AVInputFormat ff_ ## shortname ## _demuxer = {\
+    .name           = #shortname,\
+    .long_name      = NULL_IF_CONFIG_SMALL(longname),\
+    .read_probe     = probe,\
+    .read_header    = ff_raw_video_read_header,\
+    .read_packet    = ff_raw_read_partial_packet,\
+    .extensions     = ext,\
+    .flags          = AVFMT_GENERIC_INDEX,\
+    .value          = id,\
+};
+
 #endif /* AVFORMAT_RAWDEC_H */

From ebb6b27ad9a72de2382a4eb438a11043273c4cad Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 20 May 2011 14:09:21 +0200
Subject: [PATCH 365/830] tcp: make connect() timeout properly

The connect() timeout can take minutes, gets misreported as EIO and
isn't interruptible.
---
 libavformat/tcp.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/libavformat/tcp.c b/libavformat/tcp.c
index ced103876f..e602a556d2 100644
--- a/libavformat/tcp.c
+++ b/libavformat/tcp.c
@@ -45,6 +45,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     char buf[256];
     int ret;
     socklen_t optlen;
+    int timeout = 100;
     char hostname[1024],proto[1024],path[1024];
     char portstr[10];
 
@@ -57,6 +58,9 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     if (p) {
         if (av_find_info_tag(buf, sizeof(buf), "listen", p))
             listen_socket = 1;
+        if (av_find_info_tag(buf, sizeof(buf), "timeout", p)) {
+            timeout = strtol(buf, NULL, 10);
+        }
     }
     memset(&hints, 0, sizeof(hints));
     hints.ai_family = AF_UNSPEC;
@@ -73,6 +77,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     cur_ai = ai;
 
  restart:
+    ret = AVERROR(EIO);
     fd = socket(cur_ai->ai_family, cur_ai->ai_socktype, cur_ai->ai_protocol);
     if (fd < 0)
         goto fail;
@@ -84,28 +89,29 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
         fd1 = accept(fd, NULL, NULL);
         closesocket(fd);
         fd = fd1;
+        ff_socket_nonblock(fd, 1);
     } else {
  redo:
+        ff_socket_nonblock(fd, 1);
         ret = connect(fd, cur_ai->ai_addr, cur_ai->ai_addrlen);
     }
 
-    ff_socket_nonblock(fd, 1);
-
     if (ret < 0) {
         struct pollfd p = {fd, POLLOUT, 0};
-        if (ff_neterrno() == AVERROR(EINTR)) {
+        ret = ff_neterrno();
+        if (ret == AVERROR(EINTR)) {
             if (url_interrupt_cb()) {
                 ret = AVERROR_EXIT;
                 goto fail1;
             }
             goto redo;
         }
-        if (ff_neterrno() != AVERROR(EINPROGRESS) &&
-            ff_neterrno() != AVERROR(EAGAIN))
+        if (ret != AVERROR(EINPROGRESS) &&
+            ret != AVERROR(EAGAIN))
             goto fail;
 
         /* wait until we are connected or until abort */
-        for(;;) {
+        while(timeout--) {
             if (url_interrupt_cb()) {
                 ret = AVERROR_EXIT;
                 goto fail1;
@@ -114,7 +120,10 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
             if (ret > 0)
                 break;
         }
-
+        if (ret <= 0) {
+            ret = AVERROR(ETIMEDOUT);
+            goto fail;
+        }
         /* test error */
         optlen = sizeof(ret);
         getsockopt (fd, SOL_SOCKET, SO_ERROR, &ret, &optlen);
@@ -122,6 +131,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
             av_log(h, AV_LOG_ERROR,
                    "TCP connection to %s:%d failed: %s\n",
                    hostname, port, strerror(ret));
+            ret = AVERROR(ret);
             goto fail;
         }
     }
@@ -144,7 +154,6 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
             closesocket(fd);
         goto restart;
     }
-    ret = AVERROR(EIO);
  fail1:
     if (fd >= 0)
         closesocket(fd);

From 6348a96c06ca152f32fddac58552dd679d39eddf Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Thu, 26 May 2011 20:03:37 +0200
Subject: [PATCH 366/830] configure: report yasm/nasm presence properly

If the secondary assembler is in use report the proper name
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index d3e8764fb9..4916bad18a 100755
--- a/configure
+++ b/configure
@@ -3103,7 +3103,7 @@ fi
 echo "big-endian                ${bigendian-no}"
 echo "runtime cpu detection     ${runtime_cpudetect-no}"
 if enabled x86; then
-    echo "yasm                      ${yasm-no}"
+    echo "${yasmexe}                      ${yasm-no}"
     echo "MMX enabled               ${mmx-no}"
     echo "MMX2 enabled              ${mmx2-no}"
     echo "3DNow! enabled            ${amd3dnow-no}"

From ec76ef5ae251f82ab1080abb83ed7d619f2a002e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 26 May 2011 16:02:55 +0200
Subject: [PATCH 367/830] fate.txt: replace FATE rsync command with a make
 command

Remove reference to the mplayerhq.hu rsync command, and replace it
with a reference to the make fate-rsync command.

rsync needs still to be enabled on mplayerhq.hu, and it is currently
not working. Also the fate-rsync Makefile target can be easily updated
without the need to keep the docs in synch.
---
 doc/fate.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/fate.txt b/doc/fate.txt
index f8ce68ea77..6ca302c024 100644
--- a/doc/fate.txt
+++ b/doc/fate.txt
@@ -7,8 +7,7 @@ that is provided separately from the actual source distribution.
 
 Use the following command to get the fate test samples
 
-# rsync -aL rsync://rsync.mplayerhq.hu:/samples/fate-suite/ fate/fate-suite
-# rsync -aL rsync://fate-suite.libav.org:/fate-suite/ fate-suite
+# make fate-rsync SAMPLES=fate-suite/
 
 To inform the build system about the testsuite location, pass
 `--samples=<path to the samples>` to configure or set the SAMPLES Make

From e48993e3cf532a1713562a147c244b267f5ab713 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 27 May 2011 10:52:34 +0200
Subject: [PATCH 368/830] lavdev: include libavformat/avformat.h in avdevice.h

The header is always required for files which directly deal with
devices, since libavdevice uses the AVFormat* structures defined in
avformat.h.

Avoid the need to explicitely add libavformat/avformat.h.
---
 libavdevice/avdevice.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h
index 7633af4475..a31c080f9c 100644
--- a/libavdevice/avdevice.h
+++ b/libavdevice/avdevice.h
@@ -20,6 +20,7 @@
 #define AVDEVICE_AVDEVICE_H
 
 #include "libavutil/avutil.h"
+#include "libavformat/avformat.h"
 
 #define LIBAVDEVICE_VERSION_MAJOR 53
 #define LIBAVDEVICE_VERSION_MINOR  0

From 6b899e16de94c05203008d969523a642147a5e4b Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 26 May 2011 19:11:25 +0200
Subject: [PATCH 369/830] lavdev: prefer the inclusion of avdevice.h over that
 of libavformat/avformat.h

---
 libavdevice/alldevices.c        | 1 -
 libavdevice/alsa-audio-common.c | 2 +-
 libavdevice/alsa-audio-dec.c    | 2 +-
 libavdevice/alsa-audio-enc.c    | 2 +-
 libavdevice/alsa-audio.h        | 2 +-
 libavdevice/bktr.c              | 2 +-
 libavdevice/dshow.c             | 2 +-
 libavdevice/dshow.h             | 2 +-
 libavdevice/dv1394.c            | 2 +-
 libavdevice/fbdev.c             | 2 +-
 libavdevice/jack_audio.c        | 2 +-
 libavdevice/libdc1394.c         | 2 +-
 libavdevice/oss_audio.c         | 2 +-
 libavdevice/sndio_common.c      | 2 +-
 libavdevice/sndio_common.h      | 2 +-
 libavdevice/sndio_enc.c         | 3 +--
 libavdevice/v4l.c               | 2 +-
 libavdevice/v4l2.c              | 2 +-
 libavdevice/vfwcap.c            | 2 +-
 libavdevice/x11grab.c           | 2 +-
 20 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index fad7758944..3021f08bda 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c
@@ -19,7 +19,6 @@
  */
 
 #include "config.h"
-#include "libavformat/avformat.h"
 #include "avdevice.h"
 
 #define REGISTER_OUTDEV(X,x) { \
diff --git a/libavdevice/alsa-audio-common.c b/libavdevice/alsa-audio-common.c
index c00e973e1a..38466a06ce 100644
--- a/libavdevice/alsa-audio-common.c
+++ b/libavdevice/alsa-audio-common.c
@@ -29,7 +29,7 @@
  */
 
 #include <alsa/asoundlib.h>
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 #include "alsa-audio.h"
 
diff --git a/libavdevice/alsa-audio-dec.c b/libavdevice/alsa-audio-dec.c
index 24abc7c187..2424c022d3 100644
--- a/libavdevice/alsa-audio-dec.c
+++ b/libavdevice/alsa-audio-dec.c
@@ -46,9 +46,9 @@
  */
 
 #include <alsa/asoundlib.h>
-#include "libavformat/avformat.h"
 #include "libavutil/opt.h"
 
+#include "avdevice.h"
 #include "alsa-audio.h"
 
 static av_cold int audio_read_header(AVFormatContext *s1,
diff --git a/libavdevice/alsa-audio-enc.c b/libavdevice/alsa-audio-enc.c
index 0bc53b6f54..a53c1763d5 100644
--- a/libavdevice/alsa-audio-enc.c
+++ b/libavdevice/alsa-audio-enc.c
@@ -38,8 +38,8 @@
  */
 
 #include <alsa/asoundlib.h>
-#include "libavformat/avformat.h"
 
+#include "avdevice.h"
 #include "alsa-audio.h"
 
 static av_cold int audio_write_header(AVFormatContext *s1)
diff --git a/libavdevice/alsa-audio.h b/libavdevice/alsa-audio.h
index a7a000c18c..431401bb13 100644
--- a/libavdevice/alsa-audio.h
+++ b/libavdevice/alsa-audio.h
@@ -32,8 +32,8 @@
 
 #include <alsa/asoundlib.h>
 #include "config.h"
-#include "libavformat/avformat.h"
 #include "libavutil/log.h"
+#include "avdevice.h"
 
 /* XXX: we make the assumption that the soundcard accepts this format */
 /* XXX: find better solution with "preinit" method, needed also in
diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 0e57258f14..7b0f1b7382 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -24,7 +24,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavformat/avformat.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #if HAVE_DEV_BKTR_IOCTL_METEOR_H && HAVE_DEV_BKTR_IOCTL_BT848_H
@@ -47,6 +46,7 @@
 #include <signal.h>
 #include <stdint.h>
 #include <strings.h>
+#include "avdevice.h"
 
 typedef struct {
     AVClass *class;
diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index 901c766696..348fda6ecd 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c
@@ -19,9 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavformat/avformat.h"
 #include "libavformat/timefilter.h"
 
+#include "avdevice.h"
 #include "dshow.h"
 
 struct dshow_ctx {
diff --git a/libavdevice/dshow.h b/libavdevice/dshow.h
index c991c02ecb..4e79680d98 100644
--- a/libavdevice/dshow.h
+++ b/libavdevice/dshow.h
@@ -21,7 +21,7 @@
 
 #define DSHOWDEBUG 0
 
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 #define COBJMACROS
 #include <windows.h>
diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index 7fa73bdf95..4a84383264 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -32,7 +32,7 @@
 
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 #undef DV1394_DEBUG
 
diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index 4f670e9875..19bf5ad5ef 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -39,7 +39,7 @@
 
 #include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 struct rgb_pixfmt_map_entry {
     int bits_per_pixel;
diff --git a/libavdevice/jack_audio.c b/libavdevice/jack_audio.c
index 9062e7f2dd..f78a2c9164 100644
--- a/libavdevice/jack_audio.c
+++ b/libavdevice/jack_audio.c
@@ -27,8 +27,8 @@
 #include "libavutil/log.h"
 #include "libavutil/fifo.h"
 #include "libavcodec/avcodec.h"
-#include "libavformat/avformat.h"
 #include "libavformat/timefilter.h"
+#include "avdevice.h"
 
 /**
  * Size of the internal FIFO buffers as a number of audio packets
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index b2585ac260..cf9e03233e 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -21,9 +21,9 @@
  */
 
 #include "config.h"
-#include "libavformat/avformat.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "avdevice.h"
 
 #if HAVE_LIBDC1394_2
 #include <dc1394/dc1394.h>
diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c
index 66c303272e..2fde491a07 100644
--- a/libavdevice/oss_audio.c
+++ b/libavdevice/oss_audio.c
@@ -39,7 +39,7 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavcodec/avcodec.h"
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 #define AUDIO_BLOCK_SIZE 4096
 
diff --git a/libavdevice/sndio_common.c b/libavdevice/sndio_common.c
index 7e3b9a3eb9..048e72e8a2 100644
--- a/libavdevice/sndio_common.c
+++ b/libavdevice/sndio_common.c
@@ -22,7 +22,7 @@
 #include <stdint.h>
 #include <sndio.h>
 
-#include "libavformat/avformat.h"
+#include "avdevice.h"
 
 #include "sndio_common.h"
 
diff --git a/libavdevice/sndio_common.h b/libavdevice/sndio_common.h
index 1d0039564e..12218b4b24 100644
--- a/libavdevice/sndio_common.h
+++ b/libavdevice/sndio_common.h
@@ -25,8 +25,8 @@
 #include <stdint.h>
 #include <sndio.h>
 
-#include "libavformat/avformat.h"
 #include "libavutil/log.h"
+#include "avdevice.h"
 
 typedef struct {
     AVClass *class;
diff --git a/libavdevice/sndio_enc.c b/libavdevice/sndio_enc.c
index 30f19107cd..9ad5cad08a 100644
--- a/libavdevice/sndio_enc.c
+++ b/libavdevice/sndio_enc.c
@@ -22,8 +22,7 @@
 #include <stdint.h>
 #include <sndio.h>
 
-#include "libavformat/avformat.h"
-
+#include "avdevice.h"
 #include "sndio_common.h"
 
 static av_cold int audio_write_header(AVFormatContext *s1)
diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index 337b489e92..28a4e94599 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -29,7 +29,6 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
-#include "libavformat/avformat.h"
 #include "libavcodec/dsputil.h"
 #include <unistd.h>
 #include <fcntl.h>
@@ -40,6 +39,7 @@
 #include <linux/videodev.h>
 #include <time.h>
 #include <strings.h>
+#include "avdevice.h"
 
 typedef struct {
     AVClass *class;
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index a0fc99242e..f219305672 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -29,7 +29,6 @@
 
 #undef __STRICT_ANSI__ //workaround due to broken kernel headers
 #include "config.h"
-#include "libavformat/avformat.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
@@ -46,6 +45,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "avdevice.h"
 
 static const int desired_video_buffers = 256;
 
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index b8ca419b40..b8b8f52deb 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -19,9 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavformat/avformat.h"
 #include <windows.h>
 #include <vfw.h>
+#include "avdevice.h"
 
 //#define DEBUG_VFW
 
diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index 090af4930f..a41e11ab57 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -36,7 +36,6 @@
  */
 
 #include "config.h"
-#include "libavformat/avformat.h"
 #include <time.h>
 #include <X11/X.h>
 #include <X11/Xlib.h>
@@ -46,6 +45,7 @@
 #include <sys/shm.h>
 #include <X11/extensions/XShm.h>
 #include <X11/extensions/Xfixes.h>
+#include "avdevice.h"
 
 /**
  * X11 Device Demuxer context

From f642982c10dd2257a372e36c00c66b8a57954ae1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 27 May 2011 11:50:38 +0200
Subject: [PATCH 370/830] patch checklist: suggest --disable-yasm test.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 doc/developer.texi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/developer.texi b/doc/developer.texi
index 4ff3f0380b..69c2951620 100644
--- a/doc/developer.texi
+++ b/doc/developer.texi
@@ -392,6 +392,8 @@ send a reminder by email. Your patch should eventually be dealt with.
     improves readability.
 @item
     Consider to add a regression test for your code.
+@item
+    If you added YASM code please check that things still work with --disable-yasm
 @end enumerate
 
 @section Patch review process

From 041dbd3c145046e1bfd35079d23c6843a2f703cb Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 27 May 2011 12:47:03 +0200
Subject: [PATCH 371/830] swscale: dont loose precission on RGB/BGR48 input,
 that is dont drop half the bits.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c             | 132 +++++++++++--------------------
 libswscale/swscale_template.c    |  30 +++----
 tests/ref/lavfi/pixfmts_scale_le |   8 +-
 3 files changed, 63 insertions(+), 107 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 84926635c3..00941c9dfc 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1045,93 +1045,55 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static inline void rgb48ToY(int16_t *dst, const uint8_t *src, long width,
-                            uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int r = src[i*6+0];
-        int g = src[i*6+2];
-        int b = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
+#define RGB48(name, R, B, READ)\
+static inline void name ## ToY(int16_t *dst, const uint16_t *src, long width, uint32_t *unused)\
+{\
+    int i;\
+    for (i = 0; i < width; i++) {\
+        int r = READ(&src[i*3+R]);\
+        int g = READ(&src[i*3+1]);\
+        int b = READ(&src[i*3+B]);\
+\
+        dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\
+    }\
+}\
+\
+static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\
+                             const uint16_t *src1, const uint16_t *src2,\
+                             long width, uint32_t *unused)\
+{\
+    int i;\
+    assert(src1==src2);\
+    for (i = 0; i < width; i++) {\
+        int r = READ(&src1[3*i + R]);\
+        int g = READ(&src1[3*i + 1]);\
+        int b = READ(&src1[3*i + B]);\
+\
+        dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\
+        dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\
+    }\
+}\
+\
+static inline void name ## ToUV_half(int16_t *dstU, int16_t *dstV,\
+                                  const uint16_t *src1, const uint16_t *src2,\
+                                  long width, uint32_t *unused)\
+{\
+    int i;\
+    assert(src1==src2);\
+    for (i = 0; i < width; i++) {\
+        int r= READ(&src1[6*i + R]) + READ(&src1[6*i + 3+R]);\
+        int g= READ(&src1[6*i + 1]) + READ(&src1[6*i + 4]);\
+        int b= READ(&src1[6*i + B]) + READ(&src1[6*i + 3+B]);\
+\
+        dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\
+        dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\
+    }\
 }
 
-static inline void rgb48ToUV(int16_t *dstU, int16_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int b = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-        dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
-}
-
-static inline void rgb48ToUV_half(int16_t *dstU, int16_t *dstV,
-                                  const uint8_t *src1, const uint8_t *src2,
-                                  long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int b= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
-        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
-    }
-}
-
-static inline void bgr48ToY(int16_t *dst, const uint8_t *src, long width,
-                            uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src[i*6+0];
-        int g = src[i*6+2];
-        int r = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
-}
-
-static inline void bgr48ToUV(int16_t *dstU, int16_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             long width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int r = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-        dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
-    }
-}
-
-static inline void bgr48ToUV_half(int16_t *dstU, int16_t *dstV,
-                                  const uint8_t *src1, const uint8_t *src2,
-                                  long width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int r= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
-        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
-    }
-}
+RGB48(rgb48LE, 0, 2, AV_RL16)
+RGB48(rgb48BE, 0, 2, AV_RB16)
+RGB48(bgr48LE, 2, 0, AV_RL16)
+RGB48(bgr48BE, 2, 0, AV_RB16)
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
 static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 8bf38b5946..5a84ceb871 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -886,10 +886,10 @@ static void sws_init_swScale_c(SwsContext *c)
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
@@ -903,10 +903,10 @@ static void sws_init_swScale_c(SwsContext *c)
         }
     } else {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
@@ -945,10 +945,10 @@ static void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
-    case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
-    case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
+    case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break;
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break;
+    case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break;
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break;
     }
     if (c->alpPixBuf) {
         switch (srcFormat) {
@@ -972,12 +972,6 @@ static void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_BGR32  :
         c->alpSrcOffset = 3;
         break;
-    case PIX_FMT_RGB48LE:
-    case PIX_FMT_BGR48LE:
-        c->lumSrcOffset = 1;
-        c->chrSrcOffset = 1;
-        c->alpSrcOffset = 1;
-        break;
     }
 
     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index a66138a8a3..6e9ab9ae49 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -1,8 +1,8 @@
 abgr                cff82561a074874027ac1cc896fd2730
 argb                756dd1eaa5baca2238ce23dbdc452684
 bgr24               e44192347a45586c6c157e3059610cd1
-bgr48be             62e6043fbe9734e63ad679999ca8011c
-bgr48le             61237dad4fa5f3e9109db85f53cd25d9
+bgr48be             6d01b6ccd2ccf18c12985bcb2fde2218
+bgr48le             4caa6914091ad03b8f67c02d6b050bc0
 bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
 bgr555le            41e3e0961478dc634bf68a7bbd670cc9
 bgr565le            614897eaeb422bd9a972f8ee51909be5
@@ -16,8 +16,8 @@ monow               fd5d417ab7728acddffc06870661df61
 nv12                4676d59db43d657dc12841f6bc3ab452
 nv21                69c699510ff1fb777b118ebee1002f14
 rgb24               13ff53ebeab74dc05492836f1cfbd2c1
-rgb48be             7f6b1f8139c6a64eadf9dfa867ac20e8
-rgb48le             2756d8710c152cbc367656de4d0f1b76
+rgb48be             f82e99f13d5ede2a53cf3bf7178ca350
+rgb48le             3a09d89e4b27ea1a98f762e662e306a7
 rgb4_byte           d81ffd3add95842a618eec81024f0b5c
 rgb555le            bd698d86c03170c4a16607c0fd1f750f
 rgb565le            bfa0c639d80c3c03fd0c9e5f34296a5e

From 9f5d45025e8df9d5f39832caad16b94cb6ac11c5 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 27 May 2011 09:28:38 -0400
Subject: [PATCH 372/830] swscale: fix non-bitexact yuv2yuv[X2]() MMX/MMX2
 functions.

---
 libswscale/x86/swscale_template.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index fd59f763c5..a08ff6a0fb 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -59,7 +59,7 @@
         "psraw                               $3, %%mm3      \n\t"\
         "psraw                               $3, %%mm4      \n\t"\
         "packuswb                         %%mm4, %%mm3      \n\t"\
-        MOVNTQ(%%mm3, (%1, %%REGa))\
+        MOVNTQ(%%mm3, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
@@ -81,8 +81,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
+        x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
-        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
@@ -137,7 +138,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "psraw                               $3, %%mm4      \n\t"\
         "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
-        MOVNTQ(%%mm4, (%1, %%REGa))\
+        MOVNTQ(%%mm4, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -161,8 +162,9 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
+        x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
-        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)

From 87ababd7c5adb6d82a3a642b9b6e2223b3a4025b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 27 May 2011 16:54:22 +0200
Subject: [PATCH 373/830] avopt: fix segfault

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/opt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index 8f2d9a69db..6b2bc7712d 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -45,7 +45,7 @@ const AVOption *av_find_opt(void *v, const char *name, const char *unit, int mas
 const AVOption *av_next_option(void *obj, const AVOption *last)
 {
     if (last && last[1].name) return ++last;
-    else if (last)            return NULL;
+    else if (last || !(*(AVClass**)obj)->option->name) return NULL;
     else                      return (*(AVClass**)obj)->option;
 }
 

From ea7e318fb2d7a7c2395d87c93d1d5f703166ac0d Mon Sep 17 00:00:00 2001
From: Martin Lambers <marlam@marlam.de>
Date: Wed, 25 May 2011 11:28:30 +0200
Subject: [PATCH 374/830] Remove support for libdc1394 < 2.0.

Versions >= 2.0 have been around for a very long time now.
---
 configure               |  12 +---
 libavdevice/libdc1394.c | 153 +++-------------------------------------
 2 files changed, 9 insertions(+), 156 deletions(-)

diff --git a/configure b/configure
index c54706ab98..6e89473e2b 100755
--- a/configure
+++ b/configure
@@ -1076,8 +1076,6 @@ HAVE_LIST="
     isatty
     kbhit
     ldbrx
-    libdc1394_1
-    libdc1394_2
     llrint
     llrintf
     local_aligned_16
@@ -2911,6 +2909,7 @@ check_mathfunc truncf
 enabled avisynth   && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32
 enabled libcelt    && require libcelt celt/celt.h celt_decode -lcelt0
 enabled frei0r     && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; }
+enabled libdc1394  && require_pkg_config libdc1394-2 dc1394/dc1394.h dc1394_new
 enabled libdirac   && require_pkg_config dirac                          \
     "libdirac_decoder/dirac_parser.h libdirac_encoder/dirac_encoder.h"  \
     "dirac_decoder_init dirac_encoder_init"
@@ -2942,15 +2941,6 @@ enabled libxavs    && require  libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid    && require  libxvid xvid.h xvid_global -lxvidcore
 enabled mlib       && require  mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
 
-# libdc1394 check
-if enabled libdc1394; then
-    { check_lib dc1394/dc1394.h dc1394_new -ldc1394 -lraw1394 &&
-        enable libdc1394_2; } ||
-    { check_lib libdc1394/dc1394_control.h dc1394_create_handle -ldc1394_control -lraw1394 &&
-        enable libdc1394_1; } ||
-    die "ERROR: No version of libdc1394 found "
-fi
-
 SDL_CONFIG="${cross_prefix}sdl-config"
 if check_pkg_config sdl SDL_version.h SDL_Linked_Version; then
     check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags &&
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index cf9e03233e..299ae9336b 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -25,38 +25,15 @@
 #include "libavutil/opt.h"
 #include "avdevice.h"
 
-#if HAVE_LIBDC1394_2
 #include <dc1394/dc1394.h>
-#elif HAVE_LIBDC1394_1
-#include <libraw1394/raw1394.h>
-#include <libdc1394/dc1394_control.h>
-
-#define DC1394_VIDEO_MODE_320x240_YUV422 MODE_320x240_YUV422
-#define DC1394_VIDEO_MODE_640x480_YUV411 MODE_640x480_YUV411
-#define DC1394_VIDEO_MODE_640x480_YUV422 MODE_640x480_YUV422
-#define DC1394_FRAMERATE_1_875 FRAMERATE_1_875
-#define DC1394_FRAMERATE_3_75  FRAMERATE_3_75
-#define DC1394_FRAMERATE_7_5   FRAMERATE_7_5
-#define DC1394_FRAMERATE_15    FRAMERATE_15
-#define DC1394_FRAMERATE_30    FRAMERATE_30
-#define DC1394_FRAMERATE_60    FRAMERATE_60
-#define DC1394_FRAMERATE_120   FRAMERATE_120
-#define DC1394_FRAMERATE_240   FRAMERATE_240
-#endif
 
 #undef free
 
 typedef struct dc1394_data {
     AVClass *class;
-#if HAVE_LIBDC1394_1
-    raw1394handle_t handle;
-    dc1394_cameracapture camera;
-    int channel;
-#elif HAVE_LIBDC1394_2
     dc1394_t *d;
     dc1394camera_t *camera;
     dc1394video_frame_t *frame;
-#endif
     int current_frame;
     int fps;
 
@@ -161,107 +138,7 @@ out:
     return -1;
 }
 
-#if HAVE_LIBDC1394_1
-static int dc1394_v1_read_header(AVFormatContext *c, AVFormatParameters * ap)
-{
-    dc1394_data* dc1394 = c->priv_data;
-    AVStream* vst;
-    nodeid_t* camera_nodes;
-    int res;
-    struct dc1394_frame_format *fmt = NULL;
-    struct dc1394_frame_rate *fps = NULL;
-
-    if (dc1394_read_common(c,ap,&fmt,&fps) != 0)
-        return -1;
-
-#if FF_API_FORMAT_PARAMETERS
-    if (ap->channel)
-        dc1394->channel = ap->channel;
-#endif
-
-    /* Now let us prep the hardware. */
-    dc1394->handle = dc1394_create_handle(0); /* FIXME: gotta have ap->port */
-    if (!dc1394->handle) {
-        av_log(c, AV_LOG_ERROR, "Can't acquire dc1394 handle on port %d\n", 0 /* ap->port */);
-        goto out;
-    }
-    camera_nodes = dc1394_get_camera_nodes(dc1394->handle, &res, 1);
-    if (!camera_nodes || camera_nodes[dc1394->channel] == DC1394_NO_CAMERA) {
-        av_log(c, AV_LOG_ERROR, "There's no IIDC camera on the channel %d\n", dc1394->channel);
-        goto out_handle;
-    }
-    res = dc1394_dma_setup_capture(dc1394->handle, camera_nodes[dc1394->channel],
-                                   0,
-                                   FORMAT_VGA_NONCOMPRESSED,
-                                   fmt->frame_size_id,
-                                   SPEED_400,
-                                   fps->frame_rate_id, 8, 1,
-                                   c->filename,
-                                   &dc1394->camera);
-    dc1394_free_camera_nodes(camera_nodes);
-    if (res != DC1394_SUCCESS) {
-        av_log(c, AV_LOG_ERROR, "Can't prepare camera for the DMA capture\n");
-        goto out_handle;
-    }
-
-    res = dc1394_start_iso_transmission(dc1394->handle, dc1394->camera.node);
-    if (res != DC1394_SUCCESS) {
-        av_log(c, AV_LOG_ERROR, "Can't start isochronous transmission\n");
-        goto out_handle_dma;
-    }
-
-    return 0;
-
-out_handle_dma:
-    dc1394_dma_unlisten(dc1394->handle, &dc1394->camera);
-    dc1394_dma_release_camera(dc1394->handle, &dc1394->camera);
-out_handle:
-    dc1394_destroy_handle(dc1394->handle);
-out:
-    return -1;
-}
-
-static int dc1394_v1_read_packet(AVFormatContext *c, AVPacket *pkt)
-{
-    struct dc1394_data *dc1394 = c->priv_data;
-    int res;
-
-    /* discard stale frame */
-    if (dc1394->current_frame++) {
-        if (dc1394_dma_done_with_buffer(&dc1394->camera) != DC1394_SUCCESS)
-            av_log(c, AV_LOG_ERROR, "failed to release %d frame\n", dc1394->current_frame);
-    }
-
-    res = dc1394_dma_single_capture(&dc1394->camera);
-
-    if (res == DC1394_SUCCESS) {
-        dc1394->packet.data = (uint8_t *)(dc1394->camera.capture_buffer);
-        dc1394->packet.pts = (dc1394->current_frame * 1000000) / dc1394->fps;
-        res = dc1394->packet.size;
-    } else {
-        av_log(c, AV_LOG_ERROR, "DMA capture failed\n");
-        dc1394->packet.data = NULL;
-        res = -1;
-    }
-
-    *pkt = dc1394->packet;
-    return res;
-}
-
-static int dc1394_v1_close(AVFormatContext * context)
-{
-    struct dc1394_data *dc1394 = context->priv_data;
-
-    dc1394_stop_iso_transmission(dc1394->handle, dc1394->camera.node);
-    dc1394_dma_unlisten(dc1394->handle, &dc1394->camera);
-    dc1394_dma_release_camera(dc1394->handle, &dc1394->camera);
-    dc1394_destroy_handle(dc1394->handle);
-
-    return 0;
-}
-
-#elif HAVE_LIBDC1394_2
-static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap)
+static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
 {
     dc1394_data* dc1394 = c->priv_data;
     dc1394camera_list_t *list;
@@ -334,7 +211,7 @@ out:
     return -1;
 }
 
-static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt)
+static int dc1394_read_packet(AVFormatContext *c, AVPacket *pkt)
 {
     struct dc1394_data *dc1394 = c->priv_data;
     int res;
@@ -360,7 +237,7 @@ static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt)
     return res;
 }
 
-static int dc1394_v2_close(AVFormatContext * context)
+static int dc1394_close(AVFormatContext * context)
 {
     struct dc1394_data *dc1394 = context->priv_data;
 
@@ -374,25 +251,11 @@ static int dc1394_v2_close(AVFormatContext * context)
 
 AVInputFormat ff_libdc1394_demuxer = {
     .name           = "libdc1394",
-    .long_name      = NULL_IF_CONFIG_SMALL("dc1394 v.2 A/V grab"),
+    .long_name      = NULL_IF_CONFIG_SMALL("dc1394 A/V grab"),
     .priv_data_size = sizeof(struct dc1394_data),
-    .read_header    = dc1394_v2_read_header,
-    .read_packet    = dc1394_v2_read_packet,
-    .read_close     = dc1394_v2_close,
-    .flags          = AVFMT_NOFILE,
+    .read_header    = dc1394_read_header,
+    .read_packet    = dc1394_read_packet,
+    .read_close     = dc1394_close,
+    .flags          = AVFMT_NOFILE
     .priv_class     = &libdc1394_class,
 };
-
-#endif
-#if HAVE_LIBDC1394_1
-AVInputFormat ff_libdc1394_demuxer = {
-    .name           = "libdc1394",
-    .long_name      = NULL_IF_CONFIG_SMALL("dc1394 v.1 A/V grab"),
-    .priv_data_size = sizeof(struct dc1394_data),
-    .read_header    = dc1394_v1_read_header,
-    .read_packet    = dc1394_v1_read_packet,
-    .read_close     = dc1394_v1_close,
-    .flags          = AVFMT_NOFILE,
-    .priv_class     = &libdc1394_class,
-};
-#endif

From b8773e44d56667edea2d68d067d0c156522ca304 Mon Sep 17 00:00:00 2001
From: Martin Lambers <marlam@marlam.de>
Date: Thu, 26 May 2011 08:16:58 +0200
Subject: [PATCH 375/830] libdc1394: choose best video mode and rate based on
 camera capabilities.

---
 libavdevice/libdc1394.c | 242 +++++++++++++++++++++++++++-------------
 1 file changed, 162 insertions(+), 80 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 299ae9336b..f4af08174a 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -2,6 +2,7 @@
  * IIDC1394 grab interface (uses libdc1394 and libraw1394)
  * Copyright (c) 2004 Roman Shaposhnik
  * Copyright (c) 2008 Alessandro Sappia
+ * Copyright (c) 2011 Martin Lambers
  *
  * This file is part of FFmpeg.
  *
@@ -25,6 +26,9 @@
 #include "libavutil/opt.h"
 #include "avdevice.h"
 
+#include <stdlib.h>
+#include <string.h>
+
 #include <dc1394/dc1394.h>
 
 #undef free
@@ -40,16 +44,21 @@ typedef struct dc1394_data {
     AVPacket packet;
 } dc1394_data;
 
-struct dc1394_frame_format {
-    int width;
-    int height;
-    enum PixelFormat pix_fmt;
-    int frame_size_id;
-} dc1394_frame_formats[] = {
-    { 320, 240, PIX_FMT_UYVY422, DC1394_VIDEO_MODE_320x240_YUV422 },
-    { 640, 480, PIX_FMT_UYYVYY411, DC1394_VIDEO_MODE_640x480_YUV411 },
-    { 640, 480, PIX_FMT_UYVY422, DC1394_VIDEO_MODE_640x480_YUV422 },
-    { 0, 0, 0, 0 } /* gotta be the last one */
+/* The list of color codings that we support.
+ * We assume big endian for the dc1394 16bit modes: libdc1394 never sets the
+ * flag little_endian in dc1394video_frame_t. */
+struct dc1394_color_coding {
+    int pix_fmt;
+    int score;
+    uint32_t coding;
+} dc1394_color_codings[] = {
+    { PIX_FMT_GRAY16BE,  1000, DC1394_COLOR_CODING_MONO16 },
+    { PIX_FMT_RGB48BE,   1100, DC1394_COLOR_CODING_RGB16  },
+    { PIX_FMT_GRAY8,     1200, DC1394_COLOR_CODING_MONO8  },
+    { PIX_FMT_RGB24,     1300, DC1394_COLOR_CODING_RGB8   },
+    { PIX_FMT_UYYVYY411, 1400, DC1394_COLOR_CODING_YUV411 },
+    { PIX_FMT_UYVY422,   1500, DC1394_COLOR_CODING_YUV422 },
+    { PIX_FMT_NONE, 0, 0 } /* gotta be the last one */
 };
 
 struct dc1394_frame_rate {
@@ -68,9 +77,6 @@ struct dc1394_frame_rate {
 };
 
 static const AVOption options[] = {
-#if HAVE_LIBDC1394_1
-    { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
-#endif
     { NULL },
 };
 
@@ -81,73 +87,22 @@ static const AVClass libdc1394_class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-
-static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
-                                     struct dc1394_frame_format **select_fmt, struct dc1394_frame_rate **select_fps)
-{
-    dc1394_data* dc1394 = c->priv_data;
-    AVStream* vst;
-    struct dc1394_frame_format *fmt;
-    struct dc1394_frame_rate *fps;
-    enum PixelFormat pix_fmt = ap->pix_fmt == PIX_FMT_NONE ? PIX_FMT_UYVY422 : ap->pix_fmt; /* defaults */
-    int width                = !ap->width ? 320 : ap->width;
-    int height               = !ap->height ? 240 : ap->height;
-    int frame_rate           = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num);
-
-    for (fmt = dc1394_frame_formats; fmt->width; fmt++)
-         if (fmt->pix_fmt == pix_fmt && fmt->width == width && fmt->height == height)
-             break;
-
-    for (fps = dc1394_frame_rates; fps->frame_rate; fps++)
-         if (fps->frame_rate == frame_rate)
-             break;
-
-    if (!fps->frame_rate || !fmt->width) {
-        av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", avcodec_get_pix_fmt_name(pix_fmt),
-                                                                                                width, height, frame_rate);
-        goto out;
-    }
-
-    /* create a video stream */
-    vst = av_new_stream(c, 0);
-    if (!vst)
-        goto out;
-    av_set_pts_info(vst, 64, 1, 1000);
-    vst->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    vst->codec->codec_id = CODEC_ID_RAWVIDEO;
-    vst->codec->time_base.den = fps->frame_rate;
-    vst->codec->time_base.num = 1000;
-    vst->codec->width = fmt->width;
-    vst->codec->height = fmt->height;
-    vst->codec->pix_fmt = fmt->pix_fmt;
-
-    /* packet init */
-    av_init_packet(&dc1394->packet);
-    dc1394->packet.size = avpicture_get_size(fmt->pix_fmt, fmt->width, fmt->height);
-    dc1394->packet.stream_index = vst->index;
-    dc1394->packet.flags |= AV_PKT_FLAG_KEY;
-
-    dc1394->current_frame = 0;
-    dc1394->fps = fps->frame_rate;
-
-    vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, fps->frame_rate, 1000);
-    *select_fps = fps;
-    *select_fmt = fmt;
-    return 0;
-out:
-    return -1;
-}
-
 static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
 {
     dc1394_data* dc1394 = c->priv_data;
+    AVStream *vst;
+    const struct dc1394_color_coding *cc;
+    const struct dc1394_frame_rate *fr;
     dc1394camera_list_t *list;
-    int res, i;
-    struct dc1394_frame_format *fmt = NULL;
-    struct dc1394_frame_rate *fps = NULL;
-
-    if (dc1394_read_common(c,ap,&fmt,&fps) != 0)
-       return -1;
+    dc1394video_modes_t video_modes;
+    dc1394video_mode_t video_mode;
+    dc1394framerates_t frame_rates;
+    dc1394framerate_t frame_rate;
+    uint32_t dc1394_width, dc1394_height, dc1394_color_coding;
+    int rate, best_rate;
+    int score, max_score;
+    int final_width, final_height, final_pix_fmt, final_frame_rate;
+    int res, i, j;
 
     /* Now let us prep the hardware. */
     dc1394->d = dc1394_new();
@@ -166,6 +121,133 @@ static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
     /* Freeing list of cameras */
     dc1394_camera_free_list (list);
 
+    /* Get the list of video modes supported by the camera. */
+    res = dc1394_video_get_supported_modes (dc1394->camera, &video_modes);
+    if (res != DC1394_SUCCESS) {
+        av_log(c, AV_LOG_ERROR, "Could not get video formats.\n");
+        goto out_camera;
+    }
+    /* Choose the best mode. */
+    rate = (ap->time_base.num ? av_rescale(1000, ap->time_base.den, ap->time_base.num) : -1);
+    max_score = -1;
+    for (i = 0; i < video_modes.num; i++) {
+        if (video_modes.modes[i] == DC1394_VIDEO_MODE_EXIF
+                || (video_modes.modes[i] >= DC1394_VIDEO_MODE_FORMAT7_MIN
+                    && video_modes.modes[i] <= DC1394_VIDEO_MODE_FORMAT7_MAX)) {
+            /* These modes are currently not supported as they would require
+             * much more work. For the remaining modes, the functions
+             * dc1394_get_image_size_from_video_mode and
+             * dc1394_get_color_coding_from_video_mode do not need to query the
+             * camera, and thus cannot fail. */
+            continue;
+        }
+        dc1394_get_color_coding_from_video_mode (NULL, video_modes.modes[i],
+                &dc1394_color_coding);
+        for (cc = dc1394_color_codings; cc->pix_fmt != PIX_FMT_NONE; cc++)
+            if (cc->coding == dc1394_color_coding)
+                break;
+        if (cc->pix_fmt == PIX_FMT_NONE) {
+            /* We currently cannot handle this color coding. */
+            continue;
+        }
+        /* Here we know that the mode is supported. Get its frame size and the list
+         * of frame rates supported by the camera for this mode. This list is sorted
+         * in ascending order according to libdc1394 example programs. */
+        dc1394_get_image_size_from_video_mode (NULL, video_modes.modes[i],
+                &dc1394_width, &dc1394_height);
+        res = dc1394_video_get_supported_framerates (dc1394->camera, video_modes.modes[i],
+                &frame_rates);
+        if (res != DC1394_SUCCESS || frame_rates.num == 0) {
+            av_log(c, AV_LOG_ERROR, "Cannot get frame rates for video mode.\n");
+            goto out_camera;
+        }
+        /* Choose the best frame rate. */
+        best_rate = -1;
+        for (j = 0; j < frame_rates.num; j++) {
+            for (fr = dc1394_frame_rates; fr->frame_rate; fr++) {
+                if (fr->frame_rate_id == frame_rates.framerates[j]) {
+                    break;
+                }
+            }
+            if (!fr->frame_rate) {
+                /* This frame rate is not supported. */
+                continue;
+            }
+            best_rate = fr->frame_rate;
+            frame_rate = fr->frame_rate_id;
+            if (ap->time_base.num && rate == fr->frame_rate) {
+                /* This is the requested frame rate. */
+                break;
+            }
+        }
+        if (best_rate == -1) {
+            /* No supported rate found. */
+            continue;
+        }
+        /* Here we know that both the mode and the rate are supported. Compute score. */
+        if (ap->width && ap->height
+                && (dc1394_width == ap->width && dc1394_height == ap->height)) {
+            score = 110000;
+        } else {
+            score = dc1394_width * 10;  // 1600 - 16000
+        }
+        if (ap->pix_fmt == cc->pix_fmt) {
+            score += 90000;
+        } else {
+            score += cc->score;         // 1000 - 1500
+        }
+        if (ap->time_base.num && rate == best_rate) {
+            score += 70000;
+        } else {
+            score += best_rate / 1000;  // 1 - 240
+        }
+        if (score > max_score) {
+            video_mode = video_modes.modes[i];
+            final_width = dc1394_width;
+            final_height = dc1394_height;
+            final_pix_fmt = cc->pix_fmt;
+            final_frame_rate = best_rate;
+            max_score = score;
+        }
+    }
+    if (max_score == -1) {
+        av_log(c, AV_LOG_ERROR, "No suitable video mode / frame rate available.\n");
+        goto out_camera;
+    }
+    if (ap->width && ap->height && !(ap->width == final_width && ap->height == final_height)) {
+        av_log(c, AV_LOG_WARNING, "Requested frame size is not available, using fallback.\n");
+    }
+    if (ap->pix_fmt != PIX_FMT_NONE && ap->pix_fmt != final_pix_fmt) {
+        av_log(c, AV_LOG_WARNING, "Requested pixel format is not supported, using fallback.\n");
+    }
+    if (ap->time_base.num && rate != final_frame_rate) {
+        av_log(c, AV_LOG_WARNING, "Requested frame rate is not available, using fallback.\n");
+    }
+
+    /* create a video stream */
+    vst = av_new_stream(c, 0);
+    if (!vst)
+        goto out_camera;
+    av_set_pts_info(vst, 64, 1, 1000);
+    vst->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+    vst->codec->codec_id = CODEC_ID_RAWVIDEO;
+    vst->codec->time_base.den = final_frame_rate;
+    vst->codec->time_base.num = 1000;
+    vst->codec->width = final_width;
+    vst->codec->height = final_height;
+    vst->codec->pix_fmt = final_pix_fmt;
+
+    /* packet init */
+    av_init_packet(&dc1394->packet);
+    dc1394->packet.size = avpicture_get_size(final_pix_fmt, final_width, final_height);
+    dc1394->packet.stream_index = vst->index;
+    dc1394->packet.flags |= AV_PKT_FLAG_KEY;
+
+    dc1394->current_frame = 0;
+    dc1394->fps = final_frame_rate;
+
+    vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, final_frame_rate, 1000);
+
     /* Select MAX Speed possible from the cam */
     if (dc1394->camera->bmode_capable>0) {
        dc1394_video_set_operation_mode(dc1394->camera, DC1394_OPERATION_MODE_1394B);
@@ -182,13 +264,13 @@ static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
         goto out_camera;
     }
 
-    if (dc1394_video_set_mode(dc1394->camera, fmt->frame_size_id) != DC1394_SUCCESS) {
+    if (dc1394_video_set_mode(dc1394->camera, video_mode) != DC1394_SUCCESS) {
         av_log(c, AV_LOG_ERROR, "Couldn't set video format\n");
         goto out_camera;
     }
 
-    if (dc1394_video_set_framerate(dc1394->camera,fps->frame_rate_id) != DC1394_SUCCESS) {
-        av_log(c, AV_LOG_ERROR, "Couldn't set framerate %d \n",fps->frame_rate);
+    if (dc1394_video_set_framerate(dc1394->camera, frame_rate) != DC1394_SUCCESS) {
+        av_log(c, AV_LOG_ERROR, "Could not set framerate %d.\n", final_frame_rate);
         goto out_camera;
     }
     if (dc1394_capture_setup(dc1394->camera, 10, DC1394_CAPTURE_FLAGS_DEFAULT)!=DC1394_SUCCESS) {
@@ -256,6 +338,6 @@ AVInputFormat ff_libdc1394_demuxer = {
     .read_header    = dc1394_read_header,
     .read_packet    = dc1394_read_packet,
     .read_close     = dc1394_close,
-    .flags          = AVFMT_NOFILE
+    .flags          = AVFMT_NOFILE,
     .priv_class     = &libdc1394_class,
 };

From 91e3a25ef6de2efcbea38ec0f6ffd3f8785962a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 21 May 2011 14:57:04 +0300
Subject: [PATCH 376/830] movenc: Add an AVClass for setting muxer specific
 options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/movenc.c | 18 ++++++++++++++++++
 libavformat/movenc.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 52c775a565..550b957b68 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -32,10 +32,22 @@
 #include "libavcodec/put_bits.h"
 #include "internal.h"
 #include "libavutil/avstring.h"
+#include "libavutil/opt.h"
 
 #undef NDEBUG
 #include <assert.h>
 
+static const AVOption options[] = {
+    { NULL },
+};
+
+static const AVClass mov_muxer_class = {
+    .class_name = "MOV/3GP/MP4/3G2 muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 //FIXME support 64 bit variant with wide placeholders
 static int64_t updateSize(AVIOContext *pb, int64_t pos)
 {
@@ -2285,6 +2297,7 @@ AVOutputFormat ff_mov_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){codec_movvideo_tags, codec_movaudio_tags, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
 #if CONFIG_TGP_MUXER
@@ -2301,6 +2314,7 @@ AVOutputFormat ff_tgp_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){codec_3gp_tags, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
 #if CONFIG_MP4_MUXER
@@ -2317,6 +2331,7 @@ AVOutputFormat ff_mp4_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){ff_mp4_obj_type, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
 #if CONFIG_PSP_MUXER
@@ -2333,6 +2348,7 @@ AVOutputFormat ff_psp_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){ff_mp4_obj_type, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
 #if CONFIG_TG2_MUXER
@@ -2349,6 +2365,7 @@ AVOutputFormat ff_tg2_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){codec_3gp_tags, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
 #if CONFIG_IPOD_MUXER
@@ -2365,5 +2382,6 @@ AVOutputFormat ff_ipod_muxer = {
     mov_write_trailer,
     .flags = AVFMT_GLOBALHEADER,
     .codec_tag = (const AVCodecTag* const []){codec_ipod_tags, 0},
+    .priv_class = &mov_muxer_class,
 };
 #endif
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 0cc1eb8dc9..6e2b5b7a99 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -101,6 +101,7 @@ typedef struct MOVIndex {
 } MOVTrack;
 
 typedef struct MOVMuxContext {
+    const AVClass *av_class;
     int     mode;
     int64_t time;
     int     nb_streams;

From 28734ac995ef4ea9be2203144362a585b2296637 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 20 May 2011 12:27:02 +0300
Subject: [PATCH 377/830] movenc: Deprecate the global RTP hinting flag, use a
 private AVOption instead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of -fflags rtphint, set -movflags rtphint instead.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/avformat.h |  4 +++-
 libavformat/movenc.c   | 12 +++++++++++-
 libavformat/movenc.h   |  4 ++++
 libavformat/options.c  |  4 +++-
 libavformat/version.h  |  3 +++
 5 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 2ea940c186..849a85d1cc 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -723,7 +723,9 @@ typedef struct AVFormatContext {
 #define AVFMT_FLAG_IGNDTS       0x0008 ///< Ignore DTS on frames that contain both DTS & PTS
 #define AVFMT_FLAG_NOFILLIN     0x0010 ///< Do not infer any values from other values, just return what is stored in the container
 #define AVFMT_FLAG_NOPARSE      0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled
-#define AVFMT_FLAG_RTP_HINT     0x0040 ///< Add RTP hinting to the output file
+#if FF_API_FLAG_RTP_HINT
+#define AVFMT_FLAG_RTP_HINT     0x0040 ///< Deprecated, use the -movflags rtphint muxer specific AVOption instead
+#endif
 
     int loop_input;
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 550b957b68..2b6539d140 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -38,6 +38,8 @@
 #include <assert.h>
 
 static const AVOption options[] = {
+    { "movflags", "MOV muxer flags", offsetof(MOVMuxContext, flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    { "rtphint", "Add RTP hint tracks", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MOV_FLAG_RTP_HINT}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { NULL },
 };
 
@@ -2124,7 +2126,15 @@ static int mov_write_header(AVFormatContext *s)
     if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters)
         mov->chapter_track = mov->nb_streams++;
 
+#if FF_API_FLAG_RTP_HINT
     if (s->flags & AVFMT_FLAG_RTP_HINT) {
+        av_log(s, AV_LOG_WARNING, "The RTP_HINT flag is deprecated, enable it "
+                                  "via the -movflags rtphint muxer option "
+                                  "instead.\n");
+        mov->flags |= FF_MOV_FLAG_RTP_HINT;
+    }
+#endif
+    if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
         /* Add hint tracks for each audio and video stream */
         hint_track = mov->nb_streams;
         for (i = 0; i < s->nb_streams; i++) {
@@ -2220,7 +2230,7 @@ static int mov_write_header(AVFormatContext *s)
     if (mov->chapter_track)
         mov_create_chapter_track(s, mov->chapter_track);
 
-    if (s->flags & AVFMT_FLAG_RTP_HINT) {
+    if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
         /* Initialize the hint tracks for each audio and video stream */
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 6e2b5b7a99..69b6621711 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -109,8 +109,12 @@ typedef struct MOVMuxContext {
     int64_t mdat_pos;
     uint64_t mdat_size;
     MOVTrack *tracks;
+
+    int flags;
 } MOVMuxContext;
 
+#define FF_MOV_FLAG_RTP_HINT 1
+
 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
 
 int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index);
diff --git a/libavformat/options.c b/libavformat/options.c
index 377ba93b44..6ffd1a7435 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -49,7 +49,9 @@ static const AVOption options[]={
 {"nofillin", "do not fill in missing values that can be exactly calculated", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOFILLIN }, INT_MIN, INT_MAX, D, "fflags"},
 {"noparse", "disable AVParsers, this needs nofillin too", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOPARSE }, INT_MIN, INT_MAX, D, "fflags"},
 {"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"},
-{"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
+#if FF_API_FLAG_RTP_HINT
+{"rtphint", "add rtp hinting (deprecated, use the -movflags rtphint option instead)", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
+#endif
 {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D},
diff --git a/libavformat/version.h b/libavformat/version.h
index b2ac6d8181..0b53005a6f 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -71,5 +71,8 @@
 #ifndef FF_API_FORMAT_PARAMETERS
 #define FF_API_FORMAT_PARAMETERS       (LIBAVFORMAT_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_FLAG_RTP_HINT
+#define FF_API_FLAG_RTP_HINT           (LIBAVFORMAT_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVFORMAT_VERSION_H */

From f327bfa6dcdbce4593213c30a328d8aaf7a4b86b Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 27 May 2011 11:36:43 -0400
Subject: [PATCH 378/830] swscale: fix build with --disable-swscale-alpha.

---
 libswscale/x86/yuv2rgb_mmx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 23d4c42700..439482bb71 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -82,15 +82,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
     if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
             case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-#if HAVE_7REGS
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
                     return yuva420_rgb32_MMX;
 #endif
                     break;
                 } else return yuv420_rgb32_MMX;
             case PIX_FMT_BGR32:
-                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-#if HAVE_7REGS
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
                     return yuva420_bgr32_MMX;
 #endif
                     break;

From 389e2000ebc299b9da24f7e5faf9a68a88f9ee7c Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 27 May 2011 12:23:32 -0400
Subject: [PATCH 379/830] swscale: delay allocation of formatConvBuffer().

That means it won't be allocated when not needed. Alongside
this, it fixes valgrind/fate-detected memory leaks.
---
 libswscale/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 15ed9d1ce6..571f45be3d 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -791,7 +791,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2, fail);
 
     if (!dstFilter) dstFilter= &dummyFilter;
     if (!srcFilter) srcFilter= &dummyFilter;
@@ -847,6 +846,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2, fail);
     if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {

From faf8d3ddfa105d7f107a886c5f3cadf6357d89b6 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 27 May 2011 11:57:39 -0400
Subject: [PATCH 380/830] cmdutils: use sws_freeContext() instead of
 av_freep().

av_freep(swsContext) will leak all memory potentially
allocated within the swsContext.
---
 cmdutils.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index feeea694d8..778f0fb3cf 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -76,7 +76,8 @@ void uninit_opts(void)
     av_freep(&avformat_opts->key);
     av_freep(&avformat_opts);
 #if CONFIG_SWSCALE
-    av_freep(&sws_opts);
+    sws_freeContext(sws_opts);
+    sws_opts = NULL;
 #endif
     for (i = 0; i < opt_name_count; i++) {
         //opt_values are only stored for codec-specific options in which case

From 5e528cffcf3d2cb1665075649601f4e62c8764e1 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 27 May 2011 21:18:12 +0200
Subject: [PATCH 381/830] x86: Add appropriate ifdefs around certain AVX
 functions.

nasm versions prior to 2.09 have trouble assembling some of our AVX code.
Protect these sections by preprocessor macros to allow compilation to pass.
---
 libavcodec/x86/fft_mmx.asm | 8 ++++++++
 libavcodec/x86/fft_sse.c   | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index bd2e8297e7..27276a1a31 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -299,6 +299,7 @@ IF%1 mova  Z(1), m5
 
 INIT_YMM
 
+%ifdef HAVE_AVX
 align 16
 fft8_avx:
     mova      m0, Z(0)
@@ -387,6 +388,7 @@ fft32_interleave_avx:
     sub r2d, mmsize/4
     jg .deint_loop
     ret
+%endif
 
 INIT_XMM
 %define movdqa  movaps
@@ -532,6 +534,7 @@ DEFINE_ARGS z, w, n, o1, o3
 
 INIT_YMM
 
+%ifdef HAVE_AVX
 %macro INTERL_AVX 5
     vunpckhps      %3, %2, %1
     vunpcklps      %2, %2, %1
@@ -545,6 +548,7 @@ INIT_YMM
 
 DECL_PASS pass_avx, PASS_BIG 1
 DECL_PASS pass_interleave_avx, PASS_BIG 0
+%endif
 
 INIT_XMM
 
@@ -634,8 +638,10 @@ cglobal fft_dispatch%3%2, 2,5,8, z, nbits
     RET
 %endmacro ; DECL_FFT
 
+%ifdef HAVE_AVX
 DECL_FFT 6, _avx
 DECL_FFT 6, _avx, _interleave
+%endif
 DECL_FFT 5, _sse
 DECL_FFT 5, _sse, _interleave
 DECL_FFT 4, _3dn
@@ -847,4 +853,6 @@ DECL_IMDCT _sse, POSROTATESHUF
 
 INIT_YMM
 
+%ifdef HAVE_AVX
 DECL_IMDCT _avx, POSROTATESHUF_AVX
+%endif
diff --git a/libavcodec/x86/fft_sse.c b/libavcodec/x86/fft_sse.c
index 5b52988d09..add20dd5b2 100644
--- a/libavcodec/x86/fft_sse.c
+++ b/libavcodec/x86/fft_sse.c
@@ -22,6 +22,7 @@
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dsputil.h"
 #include "fft.h"
+#include "config.h"
 
 DECLARE_ASM_CONST(16, int, ff_m1m1m1m1)[4] =
     { 1 << 31, 1 << 31, 1 << 31, 1 << 31 };
@@ -30,10 +31,12 @@ void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
 void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits);
 void ff_fft_dispatch_interleave_avx(FFTComplex *z, int nbits);
 
+#if HAVE_AVX
 void ff_fft_calc_avx(FFTContext *s, FFTComplex *z)
 {
     ff_fft_dispatch_interleave_avx(z, s->nbits);
 }
+#endif
 
 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
 {

From 7eae0110022063d0b3eb423c46dea5eff5268797 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 14:39:37 +0200
Subject: [PATCH 382/830] doc: Drop DJGPP section, Libav now compiles
 out-of-the-box on FreeDOS.

---
 doc/general.texi | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/doc/general.texi b/doc/general.texi
index 94fdb1dec4..23a789572d 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -754,12 +754,6 @@ performance on systems without hardware floating point support).
 
 Using a cross-compiler is preferred for various reasons.
 
-@subsection DJGPP
-
-Libav cannot be compiled because of broken system headers, add
-@code{--extra-cflags=-U__STRICT_ANSI__} to the configure options as a
-workaround.
-
 @section OS/2
 
 For information about compiling Libav on OS/2 see

From 5a5a0f161359ca6c3fd03eac88a57bd026b8bc1d Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 27 May 2011 19:46:39 +0200
Subject: [PATCH 383/830] swscale: Remove unused variables in x86 code.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

libswscale/x86/swscale_template.c:2072: warning: unused variable ‘canMMX2BeUsed’
libswscale/x86/swscale_template.c:2145: warning: unused variable ‘canMMX2BeUsed’
libswscale/x86/swscale_template.c:2209: warning: unused variable ‘chrVPixBuf’
libswscale/x86/swscale_template.c:2237: warning: unused variable ‘chrVSrcPtr’
---
 libswscale/x86/swscale_template.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index a08ff6a0fb..5cc3a435be 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2071,7 +2071,6 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 {
     int32_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
     void    *mmx2FilterCode= c->lumMmx2FilterCode;
     int i;
 #if defined(PIC)
@@ -2144,7 +2143,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 {
     int32_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
     void    *mmx2FilterCode= c->chrMmx2FilterCode;
     int i;
 #if defined(PIC)
@@ -2210,7 +2208,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int flags= c->flags;
     int16_t **lumPixBuf= c->lumPixBuf;
     int16_t **chrUPixBuf= c->chrUPixBuf;
-    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -2236,7 +2233,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     if (dstY < dstH - 2) {
         const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
         const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-        const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
         int i;
         if (flags & SWS_ACCURATE_RND) {

From 93eb8e4d7c6654040fd160396546c547cdf51043 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 20:12:01 +0100
Subject: [PATCH 384/830] ARM: check for VFPv3

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure b/configure
index 4916bad18a..46593882df 100755
--- a/configure
+++ b/configure
@@ -1020,6 +1020,7 @@ ARCH_EXT_LIST='
     ppc4xx
     sse
     ssse3
+    vfpv3
     vis
 '
 
@@ -1205,6 +1206,7 @@ armv6t2_deps="arm"
 armvfp_deps="arm"
 iwmmxt_deps="arm"
 neon_deps="arm"
+vfpv3_deps="armvfp"
 
 mmi_deps="mips"
 
@@ -2628,6 +2630,7 @@ EOF
     enabled armvfp  && check_asm armvfp  '"fadds s0, s0, s0"'
     enabled iwmmxt  && check_asm iwmmxt  '"wunpckelub wr6, wr4"'
     enabled neon    && check_asm neon    '"vadd.i16 q0, q0, q0"'
+    enabled vfpv3   && check_asm vfpv3   '"vmov.f32 s0, #1.0"'
 
     enabled_all armv6t2 shared !pic && enable_pic
 

From 79aeade6f6f8fbd7ce1da619fdd475e5db88ae24 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 20:18:26 +0100
Subject: [PATCH 385/830] ARM: disable ff_vector_fmul_vfp on VFPv3 systems

This function uses old-style vector operations deprecated in VFPv3.
Some implementations, e.g. Cortex-A9, support them only through
slow software emulation.  Cortex-A8 does have this functionality
in hardware, but as it also has NEON, this function is not used
there regardless.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/dsputil_init_vfp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c
index 39289116ef..9cda890411 100644
--- a/libavcodec/arm/dsputil_init_vfp.c
+++ b/libavcodec/arm/dsputil_init_vfp.c
@@ -28,6 +28,7 @@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
 
 void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
 {
-    c->vector_fmul = ff_vector_fmul_vfp;
+    if (!HAVE_VFPV3)
+        c->vector_fmul = ff_vector_fmul_vfp;
     c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
 }

From b1bf8c7887395b08a68072c80f11b0c53d8e785a Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 24 May 2011 15:14:36 -0400
Subject: [PATCH 386/830] ac3enc: Move AC-3 AVOptions array to a separate file
 to make it easier to use only selected options for the different AC-3 encoder
 types.

---
 libavcodec/ac3enc.c               | 61 ++++---------------------
 libavcodec/ac3enc_opts_template.c | 75 +++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 52 deletions(-)
 create mode 100644 libavcodec/ac3enc_opts_template.c

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index ec3ffb30e4..78e97b522f 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -247,63 +247,20 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = {
 #define OFFSET(param) offsetof(AC3EncodeContext, options.param)
 #define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 
-static const AVOption options[] = {
-/* Metadata Options */
-{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
-/* downmix levels */
-{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
-{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
-/* audio production information */
-{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
-{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-/* other metadata options */
-{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
-{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
-{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
-    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
-/* extended bitstream information */
-{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
-    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
-    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
-    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
-    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
-/* Other Encoding Options */
-{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
-#if CONFIG_AC3ENC_FLOAT
-{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
-    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
-{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
-    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
-#endif
-{NULL}
-};
+
+#define AC3ENC_TYPE_AC3_FIXED   0
+#define AC3ENC_TYPE_AC3         1
 
 #if CONFIG_AC3ENC_FLOAT
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3
+#include "ac3enc_opts_template.c"
 static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                options, LIBAVUTIL_VERSION_INT };
+                                ac3_options, LIBAVUTIL_VERSION_INT };
 #else
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
+#include "ac3enc_opts_template.c"
 static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name,
-                                options, LIBAVUTIL_VERSION_INT };
+                                ac3fixed_options, LIBAVUTIL_VERSION_INT };
 #endif
 
 
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
new file mode 100644
index 0000000000..e9c15f2758
--- /dev/null
+++ b/libavcodec/ac3enc_opts_template.c
@@ -0,0 +1,75 @@
+/*
+ * AC-3 encoder options
+ * Copyright (c) 2010 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if AC3ENC_TYPE == AC3ENC_TYPE_AC3_FIXED
+static const AVOption ac3fixed_options[] = {
+#elif AC3ENC_TYPE == AC3ENC_TYPE_AC3
+static const AVOption ac3_options[] = {
+#endif
+/* Metadata Options */
+{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+/* downmix levels */
+{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
+{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
+/* audio production information */
+{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
+{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+/* other metadata options */
+{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
+{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
+    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+/* extended bitstream information */
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
+    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
+    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
+    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+/* Other Encoding Options */
+{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_AC3_FIXED
+{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
+{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
+#endif
+{NULL}
+};

From aa47c35dff263f19f64f10a93ea9e4da4e395ede Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 24 May 2011 15:20:56 -0400
Subject: [PATCH 387/830] ac3enc: add support for E-AC-3 encoding.

This adds basic stream format support and allows for arbitrary bit rates
rather than just those supported in AC-3.
---
 Changelog                         |   2 +
 configure                         |   1 +
 doc/general.texi                  |   2 +-
 libavcodec/Makefile               |   1 +
 libavcodec/ac3enc.c               | 300 +++++++++++++++++++++++++++---
 libavcodec/ac3enc_float.c         |  18 ++
 libavcodec/ac3enc_opts_template.c |   6 +
 libavcodec/allcodecs.c            |   2 +-
 libavcodec/version.h              |   2 +-
 9 files changed, 309 insertions(+), 25 deletions(-)

diff --git a/Changelog b/Changelog
index b05755503c..da948c6d73 100644
--- a/Changelog
+++ b/Changelog
@@ -4,6 +4,8 @@ releases are sorted from youngest to oldest.
 
 version <next>:
 
+- E-AC-3 audio encoder
+
 
 version 0.7_beta2:
 
diff --git a/configure b/configure
index 46593882df..b535920cb2 100755
--- a/configure
+++ b/configure
@@ -1261,6 +1261,7 @@ dca_decoder_select="mdct"
 dnxhd_encoder_select="aandct"
 dxa_decoder_select="zlib"
 eac3_decoder_select="ac3_decoder"
+eac3_encoder_select="mdct ac3dsp"
 eamad_decoder_select="aandct"
 eatgq_decoder_select="aandct"
 eatqi_decoder_select="aandct"
diff --git a/doc/general.texi b/doc/general.texi
index 23a789572d..775bf5d03f 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -614,7 +614,7 @@ following image formats are supported:
     @tab Used in Origin's Wing Commander IV AVI files.
 @item DSP Group TrueSpeech   @tab     @tab  X
 @item DV audio               @tab     @tab  X
-@item Enhanced AC-3          @tab     @tab  X
+@item Enhanced AC-3          @tab  X  @tab  X
 @item FLAC (Free Lossless Audio Codec)  @tab  X  @tab  IX
 @item GSM                    @tab  E  @tab  X
     @tab encoding supported through external library libgsm
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 81981d76b6..ca5839f2af 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -124,6 +124,7 @@ OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3dec.o eac3dec_data.o
+OBJS-$(CONFIG_EAC3_ENCODER)            += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
 OBJS-$(CONFIG_EAMAD_DECODER)           += eamad.o eaidct.o mpeg12.o \
                                           mpeg12data.o mpegvideo.o  \
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 78e97b522f..07ceda0446 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -147,6 +147,7 @@ typedef struct AC3EncodeContext {
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
+    int eac3;                               ///< indicates if this is E-AC-3 vs. AC-3
     int bitstream_id;                       ///< bitstream id                           (bsid)
     int bitstream_mode;                     ///< bitstream mode                         (bsmod)
 
@@ -157,8 +158,8 @@ typedef struct AC3EncodeContext {
     int frame_size;                         ///< current frame size in bytes
     int frame_size_code;                    ///< frame size code                        (frmsizecod)
     uint16_t crc_inv[2];
-    int bits_written;                       ///< bit count    (used to avg. bitrate)
-    int samples_written;                    ///< sample count (used to avg. bitrate)
+    int64_t bits_written;                   ///< bit count    (used to avg. bitrate)
+    int64_t samples_written;                ///< sample count (used to avg. bitrate)
 
     int fbw_channels;                       ///< number of full-bandwidth channels      (nfchans)
     int channels;                           ///< total number of channels               (nchans)
@@ -247,15 +248,20 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = {
 #define OFFSET(param) offsetof(AC3EncodeContext, options.param)
 #define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 
-
 #define AC3ENC_TYPE_AC3_FIXED   0
 #define AC3ENC_TYPE_AC3         1
+#define AC3ENC_TYPE_EAC3        2
 
 #if CONFIG_AC3ENC_FLOAT
 #define AC3ENC_TYPE AC3ENC_TYPE_AC3
 #include "ac3enc_opts_template.c"
 static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
                                 ac3_options, LIBAVUTIL_VERSION_INT };
+#undef AC3ENC_TYPE
+#define AC3ENC_TYPE AC3ENC_TYPE_EAC3
+#include "ac3enc_opts_template.c"
+static AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name,
+                                 eac3_options, LIBAVUTIL_VERSION_INT };
 #else
 #define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
 #include "ac3enc_opts_template.c"
@@ -387,7 +393,7 @@ static const int8_t ac3_coupling_start_tab[6][3][19] = {
 
 /**
  * Adjust the frame size to make the average bit rate match the target bit rate.
- * This is only needed for 11025, 22050, and 44100 sample rates.
+ * This is only needed for 11025, 22050, and 44100 sample rates or any E-AC-3.
  */
 static void adjust_frame_size(AC3EncodeContext *s)
 {
@@ -734,6 +740,35 @@ static void apply_channel_coupling(AC3EncodeContext *s)
             }
         }
     }
+
+    if (s->eac3) {
+        /* set first cpl coords */
+        int first_cpl_coords[AC3_MAX_CHANNELS];
+        for (ch = 1; ch <= s->fbw_channels; ch++)
+            first_cpl_coords[ch] = 1;
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (block->channel_in_cpl[ch]) {
+                    if (first_cpl_coords[ch]) {
+                        block->new_cpl_coords = 2;
+                        first_cpl_coords[ch]  = 0;
+                    }
+                } else {
+                    first_cpl_coords[ch] = 1;
+                }
+            }
+        }
+
+        /* set first cpl leak */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            if (block->cpl_in_use) {
+                block->new_cpl_leak = 2;
+                break;
+            }
+        }
+    }
 #endif /* CONFIG_AC3ENC_FLOAT */
 }
 
@@ -1151,23 +1186,50 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
      *   no delta bit allocation
      *   no skipped data
      *   no auxilliary data
+     *   no E-AC-3 metadata
      */
 
     /* header */
-    frame_bits = 65;
-    frame_bits += frame_bits_inc[s->channel_mode];
+    frame_bits = 16; /* sync info */
+    if (s->eac3) {
+        /* bitstream info header */
+        frame_bits += 35;
+        frame_bits += 1 + 1 + 1;
+        /* audio frame header */
+        frame_bits += 2;
+        frame_bits += 10;
+        /* exponent strategy */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            frame_bits += 2 * s->fbw_channels + s->lfe_on;
+        /* converter exponent strategy */
+        frame_bits += s->fbw_channels * 5;
+        /* snr offsets */
+        frame_bits += 10;
+        /* block start info */
+        frame_bits++;
+    } else {
+        frame_bits += 49;
+        frame_bits += frame_bits_inc[s->channel_mode];
+    }
 
     /* audio blocks */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        if (!s->eac3) {
         /* block switch flags */
         frame_bits += s->fbw_channels;
 
         /* dither flags */
         frame_bits += s->fbw_channels;
+        }
 
         /* dynamic range */
         frame_bits++;
 
+        /* spectral extension */
+        if (s->eac3)
+            frame_bits++;
+
+        if (!s->eac3) {
         /* exponent strategy */
         frame_bits += 2 * s->fbw_channels;
         if (s->lfe_on)
@@ -1177,12 +1239,19 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
         frame_bits++;
         if (!blk)
             frame_bits += 2 + 2 + 2 + 2 + 3;
+        }
 
+        /* converter snr offset */
+        if (s->eac3)
+            frame_bits++;
+
+        if (!s->eac3) {
         /* delta bit allocation */
         frame_bits++;
 
         /* skipped data */
         frame_bits++;
+        }
     }
 
     /* auxiliary data */
@@ -1207,7 +1276,7 @@ static void bit_alloc_init(AC3EncodeContext *s)
     s->slow_decay_code = 2;
     s->fast_decay_code = 1;
     s->slow_gain_code  = 1;
-    s->db_per_bit_code = 3;
+    s->db_per_bit_code = s->eac3 ? 2 : 3;
     s->floor_code      = 7;
     for (ch = 0; ch <= s->channels; ch++)
         s->fast_gain_code[ch] = 4;
@@ -1242,6 +1311,21 @@ static void count_frame_bits(AC3EncodeContext *s)
     int frame_bits = 0;
 
     /* header */
+    if (s->eac3) {
+        /* coupling */
+        if (s->channel_mode > AC3_CHMODE_MONO) {
+            frame_bits++;
+            for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+                AC3Block *block = &s->blocks[blk];
+                frame_bits++;
+                if (block->new_cpl_strategy)
+                    frame_bits++;
+            }
+        }
+        /* coupling exponent strategy */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            frame_bits += 2 * s->blocks[blk].cpl_in_use;
+    } else {
     if (opt->audio_production_info)
         frame_bits += 7;
     if (s->bitstream_id == 6) {
@@ -1250,20 +1334,29 @@ static void count_frame_bits(AC3EncodeContext *s)
         if (opt->extended_bsi_2)
             frame_bits += 14;
     }
+    }
 
     /* audio blocks */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
 
         /* coupling strategy */
+        if (!s->eac3)
         frame_bits++;
         if (block->new_cpl_strategy) {
+            if (!s->eac3)
             frame_bits++;
             if (block->cpl_in_use) {
+                if (s->eac3)
+                    frame_bits++;
+                if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO)
                 frame_bits += s->fbw_channels;
                 if (s->channel_mode == AC3_CHMODE_STEREO)
                     frame_bits++;
                 frame_bits += 4 + 4;
+                if (s->eac3)
+                    frame_bits++;
+                else
                 frame_bits += s->num_cpl_subbands - 1;
             }
         }
@@ -1272,6 +1365,7 @@ static void count_frame_bits(AC3EncodeContext *s)
         if (block->cpl_in_use) {
             for (ch = 1; ch <= s->fbw_channels; ch++) {
                 if (block->channel_in_cpl[ch]) {
+                    if (!s->eac3 || block->new_cpl_coords != 2)
                     frame_bits++;
                     if (block->new_cpl_coords) {
                         frame_bits += 2;
@@ -1283,6 +1377,7 @@ static void count_frame_bits(AC3EncodeContext *s)
 
         /* stereo rematrixing */
         if (s->channel_mode == AC3_CHMODE_STEREO) {
+            if (!s->eac3 || blk > 0)
             frame_bits++;
             if (s->blocks[blk].new_rematrixing_strategy)
                 frame_bits += block->num_rematrixing_bands;
@@ -1298,16 +1393,19 @@ static void count_frame_bits(AC3EncodeContext *s)
         }
 
         /* coupling exponent strategy */
-        if (block->cpl_in_use)
+        if (!s->eac3 && block->cpl_in_use)
             frame_bits += 2;
 
         /* snr offsets and fast gain codes */
+        if (!s->eac3) {
         frame_bits++;
         if (block->new_snr_offsets)
             frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
+        }
 
         /* coupling leak info */
         if (block->cpl_in_use) {
+            if (!s->eac3 || block->new_cpl_leak != 2)
             frame_bits++;
             if (block->new_cpl_leak)
                 frame_bits += 3 + 3;
@@ -1736,7 +1834,7 @@ static void quantize_mantissas(AC3EncodeContext *s)
 /**
  * Write the AC-3 frame header to the output bitstream.
  */
-static void output_frame_header(AC3EncodeContext *s)
+static void ac3_output_frame_header(AC3EncodeContext *s)
 {
     AC3EncOptions *opt = &s->options;
 
@@ -1789,6 +1887,79 @@ static void output_frame_header(AC3EncodeContext *s)
 }
 
 
+/**
+ * Write the E-AC-3 frame header to the output bitstream.
+ */
+static void eac3_output_frame_header(AC3EncodeContext *s)
+{
+    int blk, ch;
+    AC3EncOptions *opt = &s->options;
+
+    put_bits(&s->pb, 16, 0x0b77);                   /* sync word */
+
+    /* BSI header */
+    put_bits(&s->pb,  2, 0);                        /* stream type = independent */
+    put_bits(&s->pb,  3, 0);                        /* substream id = 0 */
+    put_bits(&s->pb, 11, (s->frame_size / 2) - 1);  /* frame size */
+    if (s->bit_alloc.sr_shift) {
+        put_bits(&s->pb, 2, 0x3);                   /* fscod2 */
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+    } else {
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+        put_bits(&s->pb, 2, 0x3);                   /* number of blocks = 6 */
+    }
+    put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
+    put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
+    put_bits(&s->pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
+    put_bits(&s->pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
+    put_bits(&s->pb, 1, 0);                         /* no compression gain */
+    put_bits(&s->pb, 1, 0);                         /* no mixing metadata */
+    /* TODO: mixing metadata */
+    put_bits(&s->pb, 1, 0);                         /* no info metadata */
+    /* TODO: info metadata */
+    put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
+
+    /* frame header */
+    put_bits(&s->pb, 1, 1);                         /* exponent strategy syntax = each block */
+    put_bits(&s->pb, 1, 0);                         /* aht enabled = no */
+    put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
+    put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dither flag syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* bit allocation model syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* fast gain codes enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dba syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* skip field syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* spx enabled = no */
+    /* coupling strategy use flags */
+    if (s->channel_mode > AC3_CHMODE_MONO) {
+        put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
+        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            put_bits(&s->pb, 1, block->new_cpl_strategy);
+            if (block->new_cpl_strategy)
+                put_bits(&s->pb, 1, block->cpl_in_use);
+        }
+    }
+    /* exponent strategy */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
+            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+    if (s->lfe_on) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    }
+    /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
+    for (ch = 1; ch <= s->fbw_channels; ch++)
+        put_bits(&s->pb, 5, 0);
+    /* snr offsets */
+    put_bits(&s->pb, 6, s->coarse_snr_offset);
+    put_bits(&s->pb, 4, s->fine_snr_offset[1]);
+    /* block start info */
+    put_bits(&s->pb, 1, 0);
+}
+
+
 /**
  * Write one audio block to the output bitstream.
  */
@@ -1799,32 +1970,51 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     AC3Block *block = &s->blocks[blk];
 
     /* block switching */
+    if (!s->eac3) {
     for (ch = 0; ch < s->fbw_channels; ch++)
         put_bits(&s->pb, 1, 0);
+    }
 
     /* dither flags */
+    if (!s->eac3) {
     for (ch = 0; ch < s->fbw_channels; ch++)
         put_bits(&s->pb, 1, 1);
+    }
 
     /* dynamic range codes */
     put_bits(&s->pb, 1, 0);
 
+    /* spectral extension */
+    if (s->eac3)
+        put_bits(&s->pb, 1, 0);
+
     /* channel coupling */
+    if (!s->eac3)
     put_bits(&s->pb, 1, block->new_cpl_strategy);
     if (block->new_cpl_strategy) {
+        if (!s->eac3)
         put_bits(&s->pb, 1, block->cpl_in_use);
         if (block->cpl_in_use) {
             int start_sub, end_sub;
+            if (s->eac3)
+                put_bits(&s->pb, 1, 0); /* enhanced coupling */
+            if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) {
             for (ch = 1; ch <= s->fbw_channels; ch++)
                 put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+            }
             if (s->channel_mode == AC3_CHMODE_STEREO)
                 put_bits(&s->pb, 1, 0); /* phase flags in use */
             start_sub = (s->start_freq[CPL_CH] - 37) / 12;
             end_sub   = (s->cpl_end_freq       - 37) / 12;
             put_bits(&s->pb, 4, start_sub);
             put_bits(&s->pb, 4, end_sub - 3);
+            /* coupling band structure */
+            if (s->eac3) {
+                put_bits(&s->pb, 1, 0); /* use default */
+            } else {
             for (bnd = start_sub+1; bnd < end_sub; bnd++)
                 put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+            }
         }
     }
 
@@ -1832,6 +2022,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     if (block->cpl_in_use) {
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch]) {
+                if (!s->eac3 || block->new_cpl_coords != 2)
                 put_bits(&s->pb, 1, block->new_cpl_coords);
                 if (block->new_cpl_coords) {
                     put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
@@ -1846,6 +2037,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* stereo rematrixing */
     if (s->channel_mode == AC3_CHMODE_STEREO) {
+        if (!s->eac3 || blk > 0)
         put_bits(&s->pb, 1, block->new_rematrixing_strategy);
         if (block->new_rematrixing_strategy) {
             /* rematrixing flags */
@@ -1855,10 +2047,12 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     /* exponent strategy */
+    if (!s->eac3) {
     for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
         put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
     if (s->lfe_on)
         put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    }
 
     /* bandwidth */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
@@ -1888,6 +2082,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     /* bit allocation info */
+    if (!s->eac3) {
     baie = (blk == 0);
     put_bits(&s->pb, 1, baie);
     if (baie) {
@@ -1897,8 +2092,10 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
         put_bits(&s->pb, 2, s->db_per_bit_code);
         put_bits(&s->pb, 3, s->floor_code);
     }
+    }
 
     /* snr offset */
+    if (!s->eac3) {
     put_bits(&s->pb, 1, block->new_snr_offsets);
     if (block->new_snr_offsets) {
         put_bits(&s->pb, 6, s->coarse_snr_offset);
@@ -1907,9 +2104,13 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
             put_bits(&s->pb, 3, s->fast_gain_code[ch]);
         }
     }
+    } else {
+        put_bits(&s->pb, 1, 0); /* no converter snr offset */
+    }
 
     /* coupling leak */
     if (block->cpl_in_use) {
+        if (!s->eac3 || block->new_cpl_leak != 2)
         put_bits(&s->pb, 1, block->new_cpl_leak);
         if (block->new_cpl_leak) {
             put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
@@ -1917,8 +2118,10 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
         }
     }
 
+    if (!s->eac3) {
     put_bits(&s->pb, 1, 0); /* no delta bit allocation */
     put_bits(&s->pb, 1, 0); /* no data to skip */
+    }
 
     /* mantissas */
     got_cpl = !block->cpl_in_use;
@@ -2007,6 +2210,10 @@ static void output_frame_end(AC3EncodeContext *s)
     if (pad_bytes > 0)
         memset(put_bits_ptr(&s->pb), 0, pad_bytes);
 
+    if (s->eac3) {
+        /* compute crc2 */
+        crc2_partial = av_crc(crc_ctx, 0, frame + 2, s->frame_size - 5);
+    } else {
     /* compute crc1 */
     /* this is not so easy because it is at the beginning of the data... */
     crc1    = av_bswap16(av_crc(crc_ctx, 0, frame + 4, frame_size_58 - 4));
@@ -2017,6 +2224,7 @@ static void output_frame_end(AC3EncodeContext *s)
     /* compute crc2 */
     crc2_partial = av_crc(crc_ctx, 0, frame + frame_size_58,
                           s->frame_size - frame_size_58 - 3);
+    }
     crc2 = av_crc(crc_ctx, crc2_partial, frame + s->frame_size - 3, 1);
     /* ensure crc2 does not match sync word by flipping crcrsv bit if needed */
     if (crc2 == 0x770B) {
@@ -2037,7 +2245,10 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame)
 
     init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
 
-    output_frame_header(s);
+    if (s->eac3)
+        eac3_output_frame_header(s);
+    else
+        ac3_output_frame_header(s);
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
         output_audio_block(s, blk);
@@ -2058,6 +2269,7 @@ static void dprint_options(AVCodecContext *avctx)
     case  8:  av_strlcpy(strbuf, "AC-3 (standard)", 32);        break;
     case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32);  break;
     case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate", 32); break;
+    case 16:  av_strlcpy(strbuf, "E-AC-3 (enhanced)", 32);      break;
     default: snprintf(strbuf, 32, "ERROR");
     }
     av_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id);
@@ -2318,13 +2530,13 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
     const SampleType *samples = data;
     int ret;
 
-    if (s->options.allow_per_frame_metadata) {
+    if (!s->eac3 && s->options.allow_per_frame_metadata) {
         ret = validate_metadata(avctx);
         if (ret)
             return ret;
     }
 
-    if (s->bit_alloc.sr_code == 1)
+    if (s->bit_alloc.sr_code == 1 || s->eac3)
         adjust_frame_size(s);
 
     deinterleave_input_samples(s, samples);
@@ -2450,7 +2662,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
 
 static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
 {
-    int i, ret;
+    int i, ret, max_sr;
 
     /* validate channel layout */
     if (!avctx->channel_layout) {
@@ -2465,20 +2677,59 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
     }
 
     /* validate sample rate */
-    for (i = 0; i < 9; i++) {
-        if ((ff_ac3_sample_rate_tab[i / 3] >> (i % 3)) == avctx->sample_rate)
+    /* note: max_sr could be changed from 2 to 5 for E-AC-3 once we find a
+             decoder that supports half sample rate so we can validate that
+             the generated files are correct. */
+    max_sr = s->eac3 ? 2 : 8;
+    for (i = 0; i <= max_sr; i++) {
+        if ((ff_ac3_sample_rate_tab[i % 3] >> (i / 3)) == avctx->sample_rate)
             break;
     }
-    if (i == 9) {
+    if (i > max_sr) {
         av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
         return AVERROR(EINVAL);
     }
     s->sample_rate        = avctx->sample_rate;
-    s->bit_alloc.sr_shift = i % 3;
-    s->bit_alloc.sr_code  = i / 3;
-    s->bitstream_id       = 8 + s->bit_alloc.sr_shift;
+    s->bit_alloc.sr_shift = i / 3;
+    s->bit_alloc.sr_code  = i % 3;
+    s->bitstream_id       = s->eac3 ? 16 : 8 + s->bit_alloc.sr_shift;
 
     /* validate bit rate */
+    if (s->eac3) {
+        int max_br, min_br, wpf, min_br_dist, min_br_code;
+
+        /* calculate min/max bitrate */
+        max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16;
+        min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16;
+        if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) {
+            av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d "
+                   "for this sample rate\n", min_br, max_br);
+            return AVERROR(EINVAL);
+        }
+
+        /* calculate words-per-frame for the selected bitrate */
+        wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate;
+        av_assert1(wpf > 0 && wpf <= 2048);
+
+        /* find the closest AC-3 bitrate code to the selected bitrate.
+           this is needed for lookup tables for bandwidth and coupling
+           parameter selection */
+        min_br_code = -1;
+        min_br_dist = INT_MAX;
+        for (i = 0; i < 19; i++) {
+            int br_dist = abs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate);
+            if (br_dist < min_br_dist) {
+                min_br_dist = br_dist;
+                min_br_code = i;
+            }
+        }
+
+        /* make sure the minimum frame size is below the average frame size */
+        s->frame_size_code = min_br_code << 1;
+        while (wpf > 1 && wpf * s->sample_rate / AC3_FRAME_SIZE * 16 > avctx->bit_rate)
+            wpf--;
+        s->frame_size_min = 2 * wpf;
+    } else {
     for (i = 0; i < 19; i++) {
         if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate)
             break;
@@ -2487,8 +2738,11 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
         av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n");
         return AVERROR(EINVAL);
     }
-    s->bit_rate        = avctx->bit_rate;
     s->frame_size_code = i << 1;
+    s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
+    }
+    s->bit_rate   = avctx->bit_rate;
+    s->frame_size = s->frame_size_min;
 
     /* validate cutoff */
     if (avctx->cutoff < 0) {
@@ -2511,9 +2765,11 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
         return AVERROR(EINVAL);
     }
 
+    if (!s->eac3) {
     ret = validate_metadata(avctx);
     if (ret)
         return ret;
+    }
 
     s->rematrixing_enabled = s->options.stereo_rematrixing &&
                              (s->channel_mode == AC3_CHMODE_STEREO);
@@ -2713,6 +2969,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
     int ret, frame_size_58;
 
+    s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
+
     avctx->frame_size = AC3_FRAME_SIZE;
 
     ff_ac3_common_init();
@@ -2725,10 +2983,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
         s->bitstream_mode = 0x7;
 
-    s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
     s->bits_written    = 0;
     s->samples_written = 0;
-    s->frame_size      = s->frame_size_min;
 
     /* calculate crc_inv for both possible frame sizes */
     frame_size_58 = (( s->frame_size    >> 2) + ( s->frame_size    >> 4)) << 1;
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 4f61440b52..012c31de5d 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -100,6 +100,7 @@ static void scale_coefficients(AC3EncodeContext *s)
 }
 
 
+#if CONFIG_AC3_ENCODER
 AVCodec ff_ac3_encoder = {
     "ac3",
     AVMEDIA_TYPE_AUDIO,
@@ -114,3 +115,20 @@ AVCodec ff_ac3_encoder = {
     .priv_class = &ac3enc_class,
     .channel_layouts = ac3_channel_layouts,
 };
+#endif
+
+#if CONFIG_EAC3_ENCODER
+AVCodec ff_eac3_encoder = {
+    .name            = "eac3",
+    .type            = AVMEDIA_TYPE_AUDIO,
+    .id              = CODEC_ID_EAC3,
+    .priv_data_size  = sizeof(AC3EncodeContext),
+    .init            = ac3_encode_init,
+    .encode          = ac3_encode_frame,
+    .close           = ac3_encode_close,
+    .sample_fmts     = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
+    .long_name       = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"),
+    .priv_class      = &eac3enc_class,
+    .channel_layouts = ac3_channel_layouts,
+};
+#endif
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index e9c15f2758..699c1b5982 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -23,7 +23,10 @@
 static const AVOption ac3fixed_options[] = {
 #elif AC3ENC_TYPE == AC3ENC_TYPE_AC3
 static const AVOption ac3_options[] = {
+#else /* AC3ENC_TYPE_EAC3 */
+static const AVOption eac3_options[] = {
 #endif
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
 /* Metadata Options */
 {"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
 /* downmix levels */
@@ -37,7 +40,9 @@ static const AVOption ac3_options[] = {
     {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
 /* other metadata options */
 {"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+#endif
 {"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
 {"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
     {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
     {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
@@ -63,6 +68,7 @@ static const AVOption ac3_options[] = {
 {"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
     {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
     {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+#endif
 /* Other Encoding Options */
 {"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
 #if AC3ENC_TYPE != AC3ENC_TYPE_AC3_FIXED
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index f063369dfd..71b6094089 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -241,7 +241,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (COOK, cook);
     REGISTER_DECODER (DCA, dca);
     REGISTER_DECODER (DSICINAUDIO, dsicinaudio);
-    REGISTER_DECODER (EAC3, eac3);
+    REGISTER_ENCDEC  (EAC3, eac3);
     REGISTER_ENCDEC  (FLAC, flac);
     REGISTER_DECODER (GSM, gsm);
     REGISTER_DECODER (GSM_MS, gsm_ms);
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 0def0d1ff9..894188b093 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR  4
+#define LIBAVCODEC_VERSION_MINOR  5
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

From 1a950da6a2d981d53f61d42f3a078888b00db091 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 24 May 2011 19:38:30 -0400
Subject: [PATCH 388/830] cosmetics: indentation and alignment after previous
 commit

---
 libavcodec/ac3enc.c | 174 ++++++++++++++++++++++----------------------
 1 file changed, 87 insertions(+), 87 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 07ceda0446..5285874caa 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1215,11 +1215,11 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
     /* audio blocks */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         if (!s->eac3) {
-        /* block switch flags */
-        frame_bits += s->fbw_channels;
+            /* block switch flags */
+            frame_bits += s->fbw_channels;
 
-        /* dither flags */
-        frame_bits += s->fbw_channels;
+            /* dither flags */
+            frame_bits += s->fbw_channels;
         }
 
         /* dynamic range */
@@ -1230,15 +1230,15 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
             frame_bits++;
 
         if (!s->eac3) {
-        /* exponent strategy */
-        frame_bits += 2 * s->fbw_channels;
-        if (s->lfe_on)
-            frame_bits++;
+            /* exponent strategy */
+            frame_bits += 2 * s->fbw_channels;
+            if (s->lfe_on)
+                frame_bits++;
 
-        /* bit allocation params */
-        frame_bits++;
-        if (!blk)
-            frame_bits += 2 + 2 + 2 + 2 + 3;
+            /* bit allocation params */
+            frame_bits++;
+            if (!blk)
+                frame_bits += 2 + 2 + 2 + 2 + 3;
         }
 
         /* converter snr offset */
@@ -1246,11 +1246,11 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
             frame_bits++;
 
         if (!s->eac3) {
-        /* delta bit allocation */
-        frame_bits++;
+            /* delta bit allocation */
+            frame_bits++;
 
-        /* skipped data */
-        frame_bits++;
+            /* skipped data */
+            frame_bits++;
         }
     }
 
@@ -1326,14 +1326,14 @@ static void count_frame_bits(AC3EncodeContext *s)
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
             frame_bits += 2 * s->blocks[blk].cpl_in_use;
     } else {
-    if (opt->audio_production_info)
-        frame_bits += 7;
-    if (s->bitstream_id == 6) {
-        if (opt->extended_bsi_1)
-            frame_bits += 14;
-        if (opt->extended_bsi_2)
-            frame_bits += 14;
-    }
+        if (opt->audio_production_info)
+            frame_bits += 7;
+        if (s->bitstream_id == 6) {
+            if (opt->extended_bsi_1)
+                frame_bits += 14;
+            if (opt->extended_bsi_2)
+                frame_bits += 14;
+        }
     }
 
     /* audio blocks */
@@ -1342,22 +1342,22 @@ static void count_frame_bits(AC3EncodeContext *s)
 
         /* coupling strategy */
         if (!s->eac3)
-        frame_bits++;
+            frame_bits++;
         if (block->new_cpl_strategy) {
             if (!s->eac3)
-            frame_bits++;
+                frame_bits++;
             if (block->cpl_in_use) {
                 if (s->eac3)
                     frame_bits++;
                 if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO)
-                frame_bits += s->fbw_channels;
+                    frame_bits += s->fbw_channels;
                 if (s->channel_mode == AC3_CHMODE_STEREO)
                     frame_bits++;
                 frame_bits += 4 + 4;
                 if (s->eac3)
                     frame_bits++;
                 else
-                frame_bits += s->num_cpl_subbands - 1;
+                    frame_bits += s->num_cpl_subbands - 1;
             }
         }
 
@@ -1366,7 +1366,7 @@ static void count_frame_bits(AC3EncodeContext *s)
             for (ch = 1; ch <= s->fbw_channels; ch++) {
                 if (block->channel_in_cpl[ch]) {
                     if (!s->eac3 || block->new_cpl_coords != 2)
-                    frame_bits++;
+                        frame_bits++;
                     if (block->new_cpl_coords) {
                         frame_bits += 2;
                         frame_bits += (4 + 4) * s->num_cpl_bands;
@@ -1378,7 +1378,7 @@ static void count_frame_bits(AC3EncodeContext *s)
         /* stereo rematrixing */
         if (s->channel_mode == AC3_CHMODE_STEREO) {
             if (!s->eac3 || blk > 0)
-            frame_bits++;
+                frame_bits++;
             if (s->blocks[blk].new_rematrixing_strategy)
                 frame_bits += block->num_rematrixing_bands;
         }
@@ -1398,15 +1398,15 @@ static void count_frame_bits(AC3EncodeContext *s)
 
         /* snr offsets and fast gain codes */
         if (!s->eac3) {
-        frame_bits++;
-        if (block->new_snr_offsets)
-            frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
+            frame_bits++;
+            if (block->new_snr_offsets)
+                frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
         }
 
         /* coupling leak info */
         if (block->cpl_in_use) {
             if (!s->eac3 || block->new_cpl_leak != 2)
-            frame_bits++;
+                frame_bits++;
             if (block->new_cpl_leak)
                 frame_bits += 3 + 3;
         }
@@ -1971,14 +1971,14 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* block switching */
     if (!s->eac3) {
-    for (ch = 0; ch < s->fbw_channels; ch++)
-        put_bits(&s->pb, 1, 0);
+        for (ch = 0; ch < s->fbw_channels; ch++)
+            put_bits(&s->pb, 1, 0);
     }
 
     /* dither flags */
     if (!s->eac3) {
-    for (ch = 0; ch < s->fbw_channels; ch++)
-        put_bits(&s->pb, 1, 1);
+        for (ch = 0; ch < s->fbw_channels; ch++)
+            put_bits(&s->pb, 1, 1);
     }
 
     /* dynamic range codes */
@@ -1990,17 +1990,17 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* channel coupling */
     if (!s->eac3)
-    put_bits(&s->pb, 1, block->new_cpl_strategy);
+        put_bits(&s->pb, 1, block->new_cpl_strategy);
     if (block->new_cpl_strategy) {
         if (!s->eac3)
-        put_bits(&s->pb, 1, block->cpl_in_use);
+            put_bits(&s->pb, 1, block->cpl_in_use);
         if (block->cpl_in_use) {
             int start_sub, end_sub;
             if (s->eac3)
                 put_bits(&s->pb, 1, 0); /* enhanced coupling */
             if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) {
-            for (ch = 1; ch <= s->fbw_channels; ch++)
-                put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+                for (ch = 1; ch <= s->fbw_channels; ch++)
+                    put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
             }
             if (s->channel_mode == AC3_CHMODE_STEREO)
                 put_bits(&s->pb, 1, 0); /* phase flags in use */
@@ -2012,8 +2012,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
             if (s->eac3) {
                 put_bits(&s->pb, 1, 0); /* use default */
             } else {
-            for (bnd = start_sub+1; bnd < end_sub; bnd++)
-                put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+                for (bnd = start_sub+1; bnd < end_sub; bnd++)
+                    put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
             }
         }
     }
@@ -2023,7 +2023,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch]) {
                 if (!s->eac3 || block->new_cpl_coords != 2)
-                put_bits(&s->pb, 1, block->new_cpl_coords);
+                    put_bits(&s->pb, 1, block->new_cpl_coords);
                 if (block->new_cpl_coords) {
                     put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
                     for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
@@ -2038,7 +2038,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     /* stereo rematrixing */
     if (s->channel_mode == AC3_CHMODE_STEREO) {
         if (!s->eac3 || blk > 0)
-        put_bits(&s->pb, 1, block->new_rematrixing_strategy);
+            put_bits(&s->pb, 1, block->new_rematrixing_strategy);
         if (block->new_rematrixing_strategy) {
             /* rematrixing flags */
             for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++)
@@ -2048,10 +2048,10 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* exponent strategy */
     if (!s->eac3) {
-    for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
-        put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
-    if (s->lfe_on)
-        put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+        for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
+            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+        if (s->lfe_on)
+            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
     }
 
     /* bandwidth */
@@ -2083,27 +2083,27 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* bit allocation info */
     if (!s->eac3) {
-    baie = (blk == 0);
-    put_bits(&s->pb, 1, baie);
-    if (baie) {
-        put_bits(&s->pb, 2, s->slow_decay_code);
-        put_bits(&s->pb, 2, s->fast_decay_code);
-        put_bits(&s->pb, 2, s->slow_gain_code);
-        put_bits(&s->pb, 2, s->db_per_bit_code);
-        put_bits(&s->pb, 3, s->floor_code);
-    }
+        baie = (blk == 0);
+        put_bits(&s->pb, 1, baie);
+        if (baie) {
+            put_bits(&s->pb, 2, s->slow_decay_code);
+            put_bits(&s->pb, 2, s->fast_decay_code);
+            put_bits(&s->pb, 2, s->slow_gain_code);
+            put_bits(&s->pb, 2, s->db_per_bit_code);
+            put_bits(&s->pb, 3, s->floor_code);
+        }
     }
 
     /* snr offset */
     if (!s->eac3) {
-    put_bits(&s->pb, 1, block->new_snr_offsets);
-    if (block->new_snr_offsets) {
-        put_bits(&s->pb, 6, s->coarse_snr_offset);
-        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
-            put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
-            put_bits(&s->pb, 3, s->fast_gain_code[ch]);
+        put_bits(&s->pb, 1, block->new_snr_offsets);
+        if (block->new_snr_offsets) {
+            put_bits(&s->pb, 6, s->coarse_snr_offset);
+            for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
+                put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
+                put_bits(&s->pb, 3, s->fast_gain_code[ch]);
+            }
         }
-    }
     } else {
         put_bits(&s->pb, 1, 0); /* no converter snr offset */
     }
@@ -2111,7 +2111,7 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     /* coupling leak */
     if (block->cpl_in_use) {
         if (!s->eac3 || block->new_cpl_leak != 2)
-        put_bits(&s->pb, 1, block->new_cpl_leak);
+            put_bits(&s->pb, 1, block->new_cpl_leak);
         if (block->new_cpl_leak) {
             put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
             put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak);
@@ -2119,8 +2119,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     if (!s->eac3) {
-    put_bits(&s->pb, 1, 0); /* no delta bit allocation */
-    put_bits(&s->pb, 1, 0); /* no data to skip */
+        put_bits(&s->pb, 1, 0); /* no delta bit allocation */
+        put_bits(&s->pb, 1, 0); /* no data to skip */
     }
 
     /* mantissas */
@@ -2265,11 +2265,11 @@ static void dprint_options(AVCodecContext *avctx)
     char strbuf[32];
 
     switch (s->bitstream_id) {
-    case  6:  av_strlcpy(strbuf, "AC-3 (alt syntax)", 32);      break;
-    case  8:  av_strlcpy(strbuf, "AC-3 (standard)", 32);        break;
-    case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32);  break;
-    case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate", 32); break;
-    case 16:  av_strlcpy(strbuf, "E-AC-3 (enhanced)", 32);      break;
+    case  6:  av_strlcpy(strbuf, "AC-3 (alt syntax)",       32); break;
+    case  8:  av_strlcpy(strbuf, "AC-3 (standard)",         32); break;
+    case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)",   32); break;
+    case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate)", 32); break;
+    case 16:  av_strlcpy(strbuf, "E-AC-3 (enhanced)",       32); break;
     default: snprintf(strbuf, 32, "ERROR");
     }
     av_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id);
@@ -2730,16 +2730,16 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
             wpf--;
         s->frame_size_min = 2 * wpf;
     } else {
-    for (i = 0; i < 19; i++) {
-        if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate)
-            break;
-    }
-    if (i == 19) {
-        av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n");
-        return AVERROR(EINVAL);
-    }
-    s->frame_size_code = i << 1;
-    s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
+        for (i = 0; i < 19; i++) {
+            if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate)
+                break;
+        }
+        if (i == 19) {
+            av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n");
+            return AVERROR(EINVAL);
+        }
+        s->frame_size_code = i << 1;
+        s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
     }
     s->bit_rate   = avctx->bit_rate;
     s->frame_size = s->frame_size_min;
@@ -2766,9 +2766,9 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
     }
 
     if (!s->eac3) {
-    ret = validate_metadata(avctx);
-    if (ret)
-        return ret;
+        ret = validate_metadata(avctx);
+        if (ret)
+            return ret;
     }
 
     s->rematrixing_enabled = s->options.stereo_rematrixing &&

From 2e0e1e712aa263d919ebb842567977195dfa4774 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 09:02:11 +0200
Subject: [PATCH 389/830] libdc1394: add a pixel_format private option.

---
 libavdevice/libdc1394.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 4462262c93..f4340f570c 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -25,6 +25,7 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 #if HAVE_LIBDC1394_2
 #include <dc1394/dc1394.h>
@@ -61,6 +62,7 @@ typedef struct dc1394_data {
     int current_frame;
     int fps;
     char *video_size;       /**< String describing video size, set by a private option. */
+    char *pixel_format;     /**< Set by a private option. */
 
     AVPacket packet;
 } dc1394_data;
@@ -99,6 +101,7 @@ static const AVOption options[] = {
     { "channel", "", offsetof(dc1394_data, channel), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
 #endif
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "uyvy422"}, 0, 0, DEC },
     { NULL },
 };
 
@@ -117,11 +120,17 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     AVStream* vst;
     struct dc1394_frame_format *fmt;
     struct dc1394_frame_rate *fps;
-    enum PixelFormat pix_fmt = ap->pix_fmt == PIX_FMT_NONE ? PIX_FMT_UYVY422 : ap->pix_fmt; /* defaults */
+    enum PixelFormat pix_fmt;
     int width, height;
     int frame_rate           = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num);
     int ret = 0;
 
+    if ((pix_fmt = av_get_pix_fmt(dc1394->pixel_format)) == PIX_FMT_NONE) {
+        av_log(c, AV_LOG_ERROR, "No such pixel format: %s.\n", dc1394->pixel_format);
+        ret = AVERROR(EINVAL);
+        goto out;
+    }
+
     if ((ret = av_parse_video_size(&width, &height, dc1394->video_size)) < 0) {
         av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
@@ -131,6 +140,8 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
         width = ap->width;
     if (ap->height > 0)
         height = ap->height;
+    if (ap->pix_fmt)
+        pix_fmt = ap->pix_fmt;
 #endif
 
     for (fmt = dc1394_frame_formats; fmt->width; fmt++)
@@ -177,6 +188,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     *select_fmt = fmt;
 out:
     av_freep(&dc1394->video_size);
+    av_freep(&dc1394->pixel_format);
     return ret;
 }
 

From d576bbf3eb0eb2a421f0c3efb2b78a47a1826cf5 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 09:02:11 +0200
Subject: [PATCH 390/830] v4l2: add a pixel_format private option.

---
 libavdevice/v4l2.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index f5baf08b42..e0186f8031 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -47,6 +47,7 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 static const int desired_video_buffers = 256;
 
@@ -71,6 +72,7 @@ struct video_data {
     char *standard;
     int channel;
     char *video_size; /**< String describing video size, set by a private option. */
+    char *pixel_format; /**< Set by a private option. */
 };
 
 struct buff_data {
@@ -544,12 +546,12 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 }
 
 static uint32_t device_try_init(AVFormatContext *s1,
-                                const AVFormatParameters *ap,
+                                enum PixelFormat pix_fmt,
                                 int *width,
                                 int *height,
                                 enum CodecID *codec_id)
 {
-    uint32_t desired_format = fmt_ff2v4l(ap->pix_fmt, s1->video_codec_id);
+    uint32_t desired_format = fmt_ff2v4l(pix_fmt, s1->video_codec_id);
 
     if (desired_format == 0 ||
         device_init(s1, width, height, desired_format) < 0) {
@@ -582,6 +584,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int res = 0;
     uint32_t desired_format, capabilities;
     enum CodecID codec_id;
+    enum PixelFormat pix_fmt = PIX_FMT_NONE;
 
     st = av_new_stream(s1, 0);
     if (!st) {
@@ -594,11 +597,18 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+    if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
+        av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
+        res = AVERROR(EINVAL);
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         s->width  = ap->width;
     if (ap->height > 0)
         s->height = ap->height;
+    if (ap->pix_fmt)
+        pix_fmt = ap->pix_fmt;
 #endif
 
     capabilities = 0;
@@ -624,7 +634,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_VERBOSE, "Setting frame size to %dx%d\n", s->width, s->height);
     }
 
-    desired_format = device_try_init(s1, ap, &s->width, &s->height, &codec_id);
+    desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id);
     if (desired_format == 0) {
         av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for "
                "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt);
@@ -670,6 +680,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
 out:
     av_freep(&s->video_size);
+    av_freep(&s->pixel_format);
     return res;
 }
 
@@ -719,6 +730,7 @@ static const AVOption options[] = {
     { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From 2a85f218680832bc8a22155c8d5225bdb9c10279 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 09:02:11 +0200
Subject: [PATCH 391/830] rawdec: add a pixel_format private option.

---
 libavformat/rawdec.c | 29 +++++++++++++++++++----------
 libavformat/rawdec.h |  1 +
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 434ed48f05..4f974d7045 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -25,6 +25,7 @@
 #include "rawdec.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 /* raw input */
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
@@ -70,30 +71,37 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         case AVMEDIA_TYPE_VIDEO: {
             FFRawVideoDemuxerContext *s1 = s->priv_data;
             int width = 0, height = 0, ret;
+            enum PixelFormat pix_fmt;
+
             if(ap->time_base.num)
                 av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
             else
                 av_set_pts_info(st, 64, 1, 25);
-            if (s1->video_size) {
-                ret = av_parse_video_size(&width, &height, s1->video_size);
-                av_freep(&s1->video_size);
-                if (ret < 0) {
-                    av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
-                    return ret;
-                }
+            if (s1->video_size && (ret = av_parse_video_size(&width, &height, s1->video_size)) < 0) {
+                av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
+                goto fail;
+            }
+            if ((pix_fmt = av_get_pix_fmt(s1->pixel_format)) == PIX_FMT_NONE) {
+                av_log(s, AV_LOG_ERROR, "No such pixel format: %s.\n", s1->pixel_format);
+                ret = AVERROR(EINVAL);
+                goto fail;
             }
 #if FF_API_FORMAT_PARAMETERS
             if (ap->width > 0)
                 width = ap->width;
             if (ap->height > 0)
                 height = ap->height;
+            if (ap->pix_fmt)
+                pix_fmt = ap->pix_fmt;
 #endif
             st->codec->width  = width;
             st->codec->height = height;
-            st->codec->pix_fmt = ap->pix_fmt;
-            if(st->codec->pix_fmt == PIX_FMT_NONE)
-                st->codec->pix_fmt= PIX_FMT_YUV420P;
+            st->codec->pix_fmt = pix_fmt;
             break;
+fail:
+            av_freep(&s1->video_size);
+            av_freep(&s1->pixel_format);
+            return ret;
             }
         default:
             return -1;
@@ -187,6 +195,7 @@ const AVClass ff_rawaudio_demuxer_class = {
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption video_options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "yuv420p"}, 0, 0, DEC },
     { NULL },
 };
 #undef OFFSET
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index 99beadd0e1..4968915cc6 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -34,6 +34,7 @@ typedef struct RawAudioDemuxerContext {
 typedef struct FFRawVideoDemuxerContext {
     const AVClass *class;     /**< Class for private options. */
     char *video_size;         /**< String describing video size, set by a private option. */
+    char *pixel_format;       /**< Set by a private option. */
 } FFRawVideoDemuxerContext;
 
 extern const AVClass ff_rawaudio_demuxer_class;

From f91ccb1df6fe6d043bd88eabc916be1093b6677b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 27 May 2011 07:38:49 +0200
Subject: [PATCH 392/830] lavf: deprecate AVFormatParameters.pix_fmt.

---
 libavformat/avformat.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 849a85d1cc..27cd0f7763 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -233,9 +233,7 @@ typedef struct AVFormatParameters {
     attribute_deprecated int channels;
     attribute_deprecated int width;
     attribute_deprecated int height;
-#endif
-    enum PixelFormat pix_fmt;
-#if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated enum PixelFormat pix_fmt;
     attribute_deprecated int channel; /**< Used to select DV channel. */
     attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */
     attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */

From fa12fb3b00cb4e08a38a4fb192293347c218b3d3 Mon Sep 17 00:00:00 2001
From: Kieran Kunhya <kieran@kunhya.com>
Date: Fri, 4 Mar 2011 15:09:32 +0000
Subject: [PATCH 393/830] Output MPEG-TS stream identifiers.

with changes by michael to simplify API
---
 libavformat/avformat.h | 7 +++++++
 libavformat/mpegts.c   | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 59123ca101..6381526a20 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -606,6 +606,13 @@ typedef struct AVStream {
      */
     int codec_info_nb_frames;
 
+    /**
+     * Stream Identifier
+     * This is the MPEG-TS stream identifier +1
+     * 0 means unknown
+     */
+    int stream_identifier;
+
     /**
      * Stream informations used internally by av_find_stream_info()
      */
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index ea11a3eaf8..9511ede08d 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -994,6 +994,9 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
             stream_type == STREAM_TYPE_PRIVATE_DATA)
             mpegts_find_stream_type(st, st->codec->codec_tag, REGD_types);
         break;
+    case 0x52: /* stream identifier descriptor */
+        st->stream_identifier = 1 + get8(pp, desc_end);
+        break;
     default:
         break;
     }

From 24adef142f4901abf7e92e79d20b79f332d5c54f Mon Sep 17 00:00:00 2001
From: Kieran Kunhya <kieran@kunhya.com>
Date: Fri, 4 Mar 2011 19:22:09 +0000
Subject: [PATCH 394/830] Export more transport stream information.

with minor addition to the comment by michael
---
 libavformat/avformat.h | 9 +++++++++
 libavformat/mpegts.c   | 7 ++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 6381526a20..783039bcb2 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -647,6 +647,9 @@ typedef struct AVProgram {
     unsigned int   *stream_index;
     unsigned int   nb_stream_indexes;
     AVMetadata *metadata;
+
+    int program_num;
+    int pmt_pid;
 } AVProgram;
 
 #define AVFMTCTX_NOHEADER      0x0001 /**< signal that no header is present
@@ -844,6 +847,12 @@ typedef struct AVFormatContext {
      * decoding: number of frames used to probe fps
      */
     int fps_probe_size;
+
+    /**
+     * Transport stream id.
+     * This will be moved into demuxer private options. Thus no API/ABI compatibility
+     */
+    int ts_id;
 } AVFormatContext;
 
 typedef struct AVPacketList {
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 9511ede08d..b5f25aa40d 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1137,6 +1137,7 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     SectionHeader h1, *h = &h1;
     const uint8_t *p, *p_end;
     int sid, pmt_pid;
+    AVProgram *program;
 
 #ifdef DEBUG
     av_dlog(ts->stream, "PAT:\n");
@@ -1149,6 +1150,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     if (h->tid != PAT_TID)
         return;
 
+    ts->stream->ts_id = h->id;
+
     clear_programs(ts);
     for(;;) {
         sid = get16(&p, p_end);
@@ -1163,7 +1166,9 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
         if (sid == 0x0000) {
             /* NIT info */
         } else {
-            av_new_program(ts->stream, sid);
+            program = av_new_program(ts->stream, sid);
+            program->program_num = sid;
+            program->pmt_pid = pmt_pid;
             if (ts->pids[pmt_pid])
                 mpegts_close_filter(ts, ts->pids[pmt_pid]);
             mpegts_open_section_filter(ts, pmt_pid, pmt_cb, ts, 1);

From 5501afa6ee857693bf4fe745792ab7b79477f731 Mon Sep 17 00:00:00 2001
From: Kieran Kunhya <kieran@kunhya.com>
Date: Fri, 11 Mar 2011 12:39:55 +0000
Subject: [PATCH 395/830] Export PCR pid

---
 libavformat/avformat.h |  1 +
 libavformat/mpegts.c   | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 783039bcb2..292479e3d6 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -650,6 +650,7 @@ typedef struct AVProgram {
 
     int program_num;
     int pmt_pid;
+    int pcr_pid;
 } AVProgram;
 
 #define AVFMTCTX_NOHEADER      0x0001 /**< signal that no header is present
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index b5f25aa40d..df7a89979d 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -221,6 +221,17 @@ static void add_pid_to_pmt(MpegTSContext *ts, unsigned int programid, unsigned i
     p->pids[p->nb_pids++] = pid;
 }
 
+static void set_pcr_pid(AVFormatContext *s, unsigned int programid, unsigned int pid)
+{
+    int i;
+    for(i=0; i<s->nb_programs; i++) {
+        if(s->programs[i]->id == programid) {
+            s->programs[i]->pcr_pid = pid;
+            break;
+        }
+    }
+}
+
 /**
  * \brief discard_pid() decides if the pid is to be discarded according
  *                      to caller's programs selection
@@ -1039,6 +1050,7 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     if (pcr_pid < 0)
         return;
     add_pid_to_pmt(ts, h->id, pcr_pid);
+    set_pcr_pid(ts->stream, h->id, pcr_pid);
 
     av_dlog(ts->stream, "pcr_pid=0x%x\n", pcr_pid);
 

From 11f2eae2aeb3c5b8a1f59efb9ccd9144b3aa074f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 26 May 2011 01:40:56 +0200
Subject: [PATCH 396/830] swscale: Remove commented-out printf cruft.

---
 libswscale/swscale-test.c         | 6 ------
 libswscale/swscale_template.c     | 2 --
 libswscale/utils.c                | 1 -
 libswscale/x86/swscale_template.c | 1 -
 4 files changed, 10 deletions(-)

diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 7f171ea725..888cbab26a 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -58,15 +58,11 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
     int x,y;
     uint64_t ssd=0;
 
-//printf("%d %d\n", w, h);
-
     for (y=0; y<h; y++) {
         for (x=0; x<w; x++) {
             int d= src1[x + y*stride1] - src2[x + y*stride2];
             ssd+= d*d;
-//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
         }
-//printf("\n");
     }
     return ssd;
 }
@@ -162,8 +158,6 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
 
         goto end;
     }
-//    printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
-//        (int)src[0], (int)src[1], (int)src[2]);
 
     printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
            av_pix_fmt_descriptors[srcFormat].name, srcW, srcH,
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 5a84ceb871..6f034c9d6f 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -352,9 +352,7 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
         int j;
         int srcPos= filterPos[i];
         int val=0;
-        //printf("filterPos: %d\n", filterPos[i]);
         for (j=0; j<filterSize; j++) {
-            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
         }
         //filter += hFilterSize;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 5eac356340..eba7f82d9e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -175,7 +175,6 @@ const char *sws_format_name(enum PixelFormat format)
 
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
-//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
     if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
     else           return getSplineCoeff(        0.0,
                                           b+ 2.0*c + 3.0*d,
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 99b4413f90..43119de6eb 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2190,7 +2190,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
 #endif
         );
         for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-            //printf("%d %d %d\n", dstWidth, i, srcW);
             dst[i] = src1[srcW-1]*128;
             dst[i+VOFW] = src2[srcW-1]*128;
         }

From 2b04858a100fd8f4a803d25bfdd97bb77ef9ece9 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 25 May 2011 21:47:31 +0200
Subject: [PATCH 397/830] swscale: revive SWS_CPU_CAPS until next major bump.

---
 libswscale/swscale.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 3d2a38f9d4..f48b1d6210 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -50,6 +50,9 @@
 #ifndef FF_API_SWS_GETCONTEXT
 #define FF_API_SWS_GETCONTEXT  (LIBSWSCALE_VERSION_MAJOR < 2)
 #endif
+#ifndef FF_API_SWS_CPU_CAPS
+#define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 2)
+#endif
 
 /**
  * Returns the LIBSWSCALE_VERSION_INT constant.
@@ -95,13 +98,18 @@ const char *swscale_license(void);
 #define SWS_ACCURATE_RND      0x40000
 #define SWS_BITEXACT          0x80000
 
-//The following flags are only provided for API/ABI compatibility they have no effect anymore
+#if FF_API_SWS_CPU_CAPS
+/**
+ * CPU caps are autodetected now, those flags
+ * are only provided for API compatibility.
+ */
 #define SWS_CPU_CAPS_MMX      0x80000000
 #define SWS_CPU_CAPS_MMX2     0x20000000
 #define SWS_CPU_CAPS_3DNOW    0x40000000
 #define SWS_CPU_CAPS_ALTIVEC  0x10000000
 #define SWS_CPU_CAPS_BFIN     0x01000000
 #define SWS_CPU_CAPS_SSE2     0x02000000
+#endif
 
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 

From 835ab9207e029ebbf7e77a3ae61f89ad7e9df82d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 15:16:19 -0400
Subject: [PATCH 398/830] swscale: remove if(accurate_rnd) branch from
 functions.

---
 libswscale/x86/swscale_template.c | 58 ++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 43119de6eb..2494160f8c 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -827,11 +827,10 @@
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
 
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
-        if (c->flags & SWS_ACCURATE_RND) {
             if (uDest) {
                 YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
@@ -841,7 +840,12 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
             }
 
             YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        } else {
+}
+
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
             if (uDest) {
                 YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
@@ -851,10 +855,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
             }
 
             YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        }
 }
 
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
 {
         long p= 4;
@@ -862,7 +865,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
         uint8_t *dst[4]= {aDest, dest, uDest, vDest};
         x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
 
-        if (c->flags & SWS_ACCURATE_RND) {
             while(p--) {
                 if (dst[p]) {
                     __asm__ volatile(
@@ -873,7 +875,16 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
                     );
                 }
             }
-        } else {
+}
+
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+    uint8_t *dst[4]= {aDest, dest, uDest, vDest};
+    x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
+
             while(p--) {
                 if (dst[p]) {
                     __asm__ volatile(
@@ -884,21 +895,19 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
                     );
                 }
             }
-        }
 }
 
 
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-        if (c->flags & SWS_ACCURATE_RND) {
             switch(c->dstFormat) {
             case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
@@ -979,8 +988,20 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 YSCALEYUV2PACKEDX_END
                 return;
             }
-        } else {
-            switch(c->dstFormat) {
+
+    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                   chrFilter, chrSrc, chrFilterSize,
+                   alpSrc, dest, dstW, dstY);
+}
+
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+        switch(c->dstFormat) {
             case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX
@@ -1053,7 +1074,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 YSCALEYUV2PACKEDX_END
                 return;
             }
-        }
 
     yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                    chrFilter, chrSrc, chrFilterSize,
@@ -2284,11 +2304,17 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat;
 
     if (!(c->flags & SWS_BITEXACT)) {
-        c->yuv2yuv1     = RENAME(yuv2yuv1    );
-        c->yuv2yuvX     = RENAME(yuv2yuvX    );
+        if (c->flags & SWS_ACCURATE_RND) {
+            c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
+            c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
+            c->yuv2packedX  = RENAME(yuv2packedX_ar );
+        } else {
+            c->yuv2yuv1     = RENAME(yuv2yuv1    );
+            c->yuv2yuvX     = RENAME(yuv2yuvX    );
+            c->yuv2packedX  = RENAME(yuv2packedX );
+        }
         c->yuv2packed1  = RENAME(yuv2packed1 );
         c->yuv2packed2  = RENAME(yuv2packed2 );
-        c->yuv2packedX  = RENAME(yuv2packedX );
     }
 
     c->hScale       = RENAME(hScale      );

From f2a3b23051c61a38ba5cb04414e6ebe0986041da Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 15:18:40 -0400
Subject: [PATCH 399/830] swscale: remove if(full_chr_int) from yuv2packed1().

If that flag is set, swScale() already proxies the call to
yuv2rgbXinC_full(). Therefore, this flag is never set when
yuv2packed1() is called.
---
 libswscale/swscale_template.c     | 5 -----
 libswscale/x86/swscale_template.c | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 6f034c9d6f..586614f989 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -127,11 +127,6 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
     const int yalpha= 4096; //FIXME ...
 
-    if (flags&SWS_FULL_CHR_H_INT) {
-        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-        return;
-    }
-
     if (uvalpha < 2048) {
         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
     } else {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 2494160f8c..4fe53bd73b 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1230,11 +1230,6 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
 {
         const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (flags&SWS_FULL_CHR_H_INT) {
-            c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
-            return;
-        }
-
         if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
             switch(dstFormat) {
             case PIX_FMT_RGB32:

From 264dcc63afdb7c5bd6b5e8c0d8e95dd45c2e9f3e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 16:02:47 -0400
Subject: [PATCH 400/830] swscale: remove if (c->dstFormat) branch from
 yuv2packed[12X]().

This allows cutting up the function in much smaller and easier-
to-maintain chunks.
---
 libswscale/x86/swscale_template.c | 426 ++++++++++++++++++------------
 1 file changed, 262 insertions(+), 164 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4fe53bd73b..de6ef104a4 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -901,15 +901,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-            switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX_ACCURATE
                     YSCALEYUV2RGBX
@@ -932,8 +930,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                     YSCALEYUV2PACKEDX_END
                 }
-                return;
-            case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -947,8 +952,16 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
                 "r" (dest), "m" (dstW_reg)
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
-                return;
-            case PIX_FMT_RGB555:
+}
+
+
+static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -961,8 +974,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                 WRITERGB15(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -975,8 +995,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
 
                 WRITERGB16(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX_ACCURATE
                 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 
@@ -986,23 +1013,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
                 "psraw $3, %%mm7    \n\t"
                 WRITEYUY2(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            }
-
-    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                   chrFilter, chrSrc, chrFilterSize,
-                   alpSrc, dest, dstW, dstY);
 }
 
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-        switch(c->dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     YSCALEYUV2PACKEDX
                     YSCALEYUV2RGBX
@@ -1019,8 +1038,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                     WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                     YSCALEYUV2PACKEDX_END
                 }
-                return;
-            case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor                    %%mm7, %%mm7       \n\t"
@@ -1033,8 +1059,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 "r" (dest),  "m" (dstW_reg)
                 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
                 );
-                return;
-            case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -1047,8 +1080,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
                 WRITERGB15(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 YSCALEYUV2RGBX
                 "pxor %%mm7, %%mm7 \n\t"
@@ -1061,8 +1101,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
                 WRITERGB16(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
                 YSCALEYUV2PACKEDX
                 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 
@@ -1072,23 +1119,14 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                 "psraw $3, %%mm7    \n\t"
                 WRITEYUY2(%4, %5, %%REGa)
                 YSCALEYUV2PACKEDX_END
-                return;
-            }
-
-    yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                   chrFilter, chrSrc, chrFilterSize,
-                   alpSrc, dest, dstW, dstY);
 }
 
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
 {
-        switch(c->dstFormat) {
-        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-        case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
                 __asm__ volatile(
@@ -1145,8 +1183,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                     "a" (&c->redDither)
                 );
             }
-            return;
-        case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1159,8 +1201,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1181,8 +1227,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
@@ -1202,8 +1252,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov %4, %%"REG_b"                        \n\t"
@@ -1215,24 +1269,17 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
                 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                 "a" (&c->redDither)
             );
-            return;
-        }
-
-    yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
-                  dest, dstW, yalpha, uvalpha, y);
 }
 
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
                           const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
 {
         const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
         if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
                 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
                     __asm__ volatile(
                         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1262,8 +1309,45 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                         "a" (&c->redDither)
                     );
                 }
-                return;
-            case PIX_FMT_BGR24:
+        } else {
+                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                } else {
+                    __asm__ volatile(
+                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                        "mov        %4, %%"REG_b"               \n\t"
+                        "push %%"REG_BP"                        \n\t"
+                        YSCALEYUV2RGB1b(%%REGBP, %5)
+                        "pcmpeqd %%mm7, %%mm7                   \n\t"
+                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                        "pop %%"REG_BP"                         \n\t"
+                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                        "a" (&c->redDither)
+                    );
+                }
+        }
+}
+
+static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1277,8 +1361,29 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_RGB555:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1298,8 +1403,35 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_RGB565:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1320,8 +1452,36 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            case PIX_FMT_YUYV422:
+        } else {
+                __asm__ volatile(
+                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                    "mov        %4, %%"REG_b"               \n\t"
+                    "push %%"REG_BP"                        \n\t"
+                    YSCALEYUV2RGB1b(%%REGBP, %5)
+                    "pxor    %%mm7, %%mm7                   \n\t"
+                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                    "pop %%"REG_BP"                         \n\t"
+                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                    "a" (&c->redDither)
+                );
+        }
+}
+
+static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1334,100 +1494,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            }
         } else {
-            switch(dstFormat) {
-            case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
                 __asm__ volatile(
                     "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                     "mov        %4, %%"REG_b"               \n\t"
@@ -1440,12 +1507,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
                     :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
                     "a" (&c->redDither)
                 );
-                return;
-            }
         }
-
-    yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
-                  dstW, uvalpha, dstFormat, flags, y);
 }
 
 //FIXME yuy2* can read up to 7 samples too much
@@ -2302,14 +2364,50 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
             c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
-            c->yuv2packedX  = RENAME(yuv2packedX_ar );
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
+            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
+            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
+            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
+            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
+            default: break;
+            }
         } else {
             c->yuv2yuv1     = RENAME(yuv2yuv1    );
             c->yuv2yuvX     = RENAME(yuv2yuvX    );
-            c->yuv2packedX  = RENAME(yuv2packedX );
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
+            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
+            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
+            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
+            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
+            default: break;
+            }
+        }
+        switch (c->dstFormat) {
+        case PIX_FMT_RGB32:
+                c->yuv2packed1 = RENAME(yuv2rgb32_1);
+                c->yuv2packed2 = RENAME(yuv2rgb32_2);
+                break;
+        case PIX_FMT_BGR24:
+                c->yuv2packed1 = RENAME(yuv2bgr24_1);
+                c->yuv2packed2 = RENAME(yuv2bgr24_2);
+                break;
+        case PIX_FMT_RGB555:
+                c->yuv2packed1 = RENAME(yuv2rgb555_1);
+                c->yuv2packed2 = RENAME(yuv2rgb555_2);
+                break;
+        case PIX_FMT_RGB565:
+                c->yuv2packed1 = RENAME(yuv2rgb565_1);
+                c->yuv2packed2 = RENAME(yuv2rgb565_2);
+                break;
+        case PIX_FMT_YUYV422:
+                c->yuv2packed1 = RENAME(yuv2yuyv422_1);
+                c->yuv2packed2 = RENAME(yuv2yuyv422_2);
+                break;
+        default:
+                break;
         }
-        c->yuv2packed1  = RENAME(yuv2packed1 );
-        c->yuv2packed2  = RENAME(yuv2packed2 );
     }
 
     c->hScale       = RENAME(hScale      );

From 1dbf40c38392eab5b893aadcfbb48b589ff1471f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 16:45:41 -0400
Subject: [PATCH 401/830] swscale: remove duplicate mmx/mmx2 functions if they
 are identical.

---
 libswscale/x86/swscale_template.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index de6ef104a4..72504e63fa 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1510,6 +1510,7 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, co
         }
 }
 
+#if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
 static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
@@ -1691,6 +1692,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
 {
@@ -1822,7 +1824,7 @@ static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
 }
 
-
+#if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
                                   const int16_t *filter, const int16_t *filterPos, long filterSize)
@@ -1977,6 +1979,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
         );
     }
 }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
 static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
                                     const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
@@ -2410,7 +2413,9 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         }
     }
 
+#if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
     // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
 #if COMPILE_TEMPLATE_MMX2
@@ -2426,7 +2431,8 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
-     switch(srcFormat) {
+#if !COMPILE_TEMPLATE_MMX2
+    switch(srcFormat) {
         case PIX_FMT_YUYV422  : c->chrToYV12 = RENAME(yuy2ToUV); break;
         case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
         case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
@@ -2439,6 +2445,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         case PIX_FMT_YUV422P16LE:
         case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
     }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
     if (!c->chrSrcHSubSample) {
         switch(srcFormat) {
         case PIX_FMT_BGR24  : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -2448,21 +2455,25 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
     }
 
     switch (srcFormat) {
+#if !COMPILE_TEMPLATE_MMX2
     case PIX_FMT_YUYV422  :
     case PIX_FMT_Y400A    :
                             c->lumToYV12 = RENAME(yuy2ToY); break;
     case PIX_FMT_UYVY422  :
                             c->lumToYV12 = RENAME(uyvyToY); break;
+#endif /* !COMPILE_TEMPLATE_MMX2 */
     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
     case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
     default: break;
     }
+#if !COMPILE_TEMPLATE_MMX2
     if (c->alpPixBuf) {
         switch (srcFormat) {
         case PIX_FMT_Y400A  : c->alpToYV12 = RENAME(yuy2ToY); break;
         default: break;
         }
     }
+#endif /* !COMPILE_TEMPLATE_MMX2 */
 
     if(isAnyRGB(c->srcFormat))
         c->hScale16= RENAME(hScale16);

From c3f07903ec47d2fa6ee59a1129df3f25ad22e6eb Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 09:11:29 -0400
Subject: [PATCH 402/830] swscale: reformat x86/swscale_template.c.

Interleave macros and code so that it's easier to find the
actual code that belongs to a function. Also reindent where
appropriate and remove dead code.
---
 libswscale/x86/swscale_template.c | 1761 +++++++++++++++--------------
 1 file changed, 889 insertions(+), 872 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 72504e63fa..a6abb9e805 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -73,6 +73,24 @@
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
+{
+    if (uDest) {
+        YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+        YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+    }
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+    }
+
+    YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+}
+
 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
     __asm__ volatile(\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -135,6 +153,24 @@
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
+static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                       uint8_t *aDest, long dstW, long chrDstW)
+{
+    if (uDest) {
+        YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+        YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+    }
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+    }
+
+    YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+}
+
 #define YSCALEYUV2YV121 \
     "mov %2, %%"REG_a"                    \n\t"\
     ".p2align               4             \n\t" /* FIXME Unroll? */\
@@ -148,6 +184,28 @@
     "add                   $8, %%"REG_a"  \n\t"\
     "jnc                   1b             \n\t"
 
+static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
+                                    const int16_t *chrSrc, const int16_t *alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
+    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+
+    while (p--) {
+        if (dst[p]) {
+            __asm__ volatile(
+               YSCALEYUV2YV121
+               :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                  "g" (-counter[p])
+               : "%"REG_a
+            );
+        }
+    }
+}
+
 #define YSCALEYUV2YV121_ACCURATE \
     "mov %2, %%"REG_a"                    \n\t"\
     "pcmpeqw %%mm7, %%mm7                 \n\t"\
@@ -166,13 +224,28 @@
     "add                   $8, %%"REG_a"  \n\t"\
     "jnc                   1b             \n\t"
 
-/*
-    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
-       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
-       "r" (dest), "m" (dstW_reg),
-       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
-    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
-*/
+static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
+                                       const int16_t *chrSrc, const int16_t *alpSrc,
+                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                       uint8_t *aDest, long dstW, long chrDstW)
+{
+    long p= 4;
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
+    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+
+    while (p--) {
+        if (dst[p]) {
+            __asm__ volatile(
+                YSCALEYUV2YV121_ACCURATE
+                :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                   "g" (-counter[p])
+                : "%"REG_a
+            );
+        }
+    }
+}
+
 #define YSCALEYUV2PACKEDX_UV \
     __asm__ volatile(\
         "xor                   %%"REG_a", %%"REG_a"     \n\t"\
@@ -362,263 +435,6 @@
     "packuswb        %%mm6, %%mm5       \n\t"\
     "packuswb        %%mm3, %%mm4       \n\t"\
 
-#define REAL_YSCALEYUV2PACKED(index, c) \
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
-    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
-    "psraw                $3, %%mm0                           \n\t"\
-    "psraw                $3, %%mm1                           \n\t"\
-    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "xor            "#index", "#index"                        \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
-
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-
-#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
-    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define REAL_YSCALEYUV2RGB_COEFF(c) \
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
-
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
-    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
-    REAL_YSCALEYUV2RGB_COEFF(c)
-
-#define REAL_YSCALEYUV2PACKED1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $7, %%mm3     \n\t" \
-    "psraw                $7, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t" \
-
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
-
-#define REAL_YSCALEYUV2RGB1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
-
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
-    "xor "#index", "#index"             \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $8, %%mm3     \n\t" \
-    "psrlw                $8, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
-
-// do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ".p2align              4            \n\t"\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
-    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
-
-#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
-    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
-    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
-    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
-    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
-    "packuswb          %%mm1, %%mm7     \n\t"
-#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
-
 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
     "movq       "#b", "#q2"     \n\t" /* B */\
     "movq       "#r", "#t"      \n\t" /* R */\
@@ -643,6 +459,64 @@
     " jb      1b                \n\t"
 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
 
+static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                          const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          int chrFilterSize, const int16_t **alpSrc,
+                                          uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        YSCALEYUV2PACKEDX_ACCURATE
+        YSCALEYUV2RGBX
+        "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
+        "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
+        "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
+        YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
+        "movq               "Y_TEMP"(%0), %%mm5         \n\t"
+        "psraw                        $3, %%mm1         \n\t"
+        "psraw                        $3, %%mm7         \n\t"
+        "packuswb                  %%mm7, %%mm1         \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    } else {
+        YSCALEYUV2PACKEDX_ACCURATE
+        YSCALEYUV2RGBX
+        "pcmpeqd %%mm7, %%mm7 \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    }
+}
+
+static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
+        "psraw                        $3, %%mm1         \n\t"
+        "psraw                        $3, %%mm7         \n\t"
+        "packuswb                  %%mm7, %%mm1         \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    } else {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        "pcmpeqd %%mm7, %%mm7 \n\t"
+        WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    }
+}
+
 #define REAL_WRITERGB16(dst, dstw, index) \
     "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
     "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
@@ -671,6 +545,50 @@
     " jb             1b             \n\t"
 #define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
 
+static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                           const int16_t **lumSrc, int lumFilterSize,
+                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           int chrFilterSize, const int16_t **alpSrc,
+                                           uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+    WRITERGB16(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
+static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
+                                        const int16_t **lumSrc, int lumFilterSize,
+                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        int chrFilterSize, const int16_t **alpSrc,
+                                        uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+    WRITERGB16(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
 #define REAL_WRITERGB15(dst, dstw, index) \
     "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
     "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
@@ -700,6 +618,50 @@
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
+static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                           const int16_t **lumSrc, int lumFilterSize,
+                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           int chrFilterSize, const int16_t **alpSrc,
+                                           uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+    WRITERGB15(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
+static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
+                                        const int16_t **lumSrc, int lumFilterSize,
+                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        int chrFilterSize, const int16_t **alpSrc,
+                                        uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+    WRITERGB15(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
+}
+
 #define WRITEBGR24MMX(dst, dstw, index) \
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
     "movq      %%mm2, %%mm1     \n\t" /* B */\
@@ -809,6 +771,50 @@
 #define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
 #endif
 
+static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                          const int16_t **lumSrc, int lumFilterSize,
+                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          int chrFilterSize, const int16_t **alpSrc,
+                                          uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX_ACCURATE
+    YSCALEYUV2RGBX
+    "pxor %%mm7, %%mm7 \n\t"
+    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
+    "add %4, %%"REG_c"                        \n\t"
+    WRITEBGR24(%%REGc, %5, %%REGa)
+    :: "r" (&c->redDither),
+       "m" (dummy), "m" (dummy), "m" (dummy),
+       "r" (dest), "m" (dstW_reg)
+    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    );
+}
+
+static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       int chrFilterSize, const int16_t **alpSrc,
+                                       uint8_t *dest, long dstW, long dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+
+    YSCALEYUV2PACKEDX
+    YSCALEYUV2RGBX
+    "pxor                    %%mm7, %%mm7       \n\t"
+    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
+    "add                        %4, %%"REG_c"   \n\t"
+    WRITEBGR24(%%REGc, %5, %%REGa)
+    :: "r" (&c->redDither),
+       "m" (dummy), "m" (dummy), "m" (dummy),
+       "r" (dest),  "m" (dstW_reg)
+    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    );
+}
+
 #define REAL_WRITEYUY2(dst, dstw, index) \
     "packuswb  %%mm3, %%mm3     \n\t"\
     "packuswb  %%mm4, %%mm4     \n\t"\
@@ -827,687 +833,698 @@
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
 
-static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-            if (uDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-}
-
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-            if (uDest) {
-                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-            if (CONFIG_SWSCALE_ALPHA && aDest) {
-                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
-            }
-
-            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-}
-
-static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-        long p= 4;
-        const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
-        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
-
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121_ACCURATE
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-}
-
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
-{
-    long p= 4;
-    const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-    uint8_t *dst[4]= {aDest, dest, uDest, vDest};
-    x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
-
-            while(p--) {
-                if (dst[p]) {
-                    __asm__ volatile(
-                        YSCALEYUV2YV121
-                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                        "g" (-counter[p])
-                        : "%"REG_a
-                    );
-                }
-            }
-}
-
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                            const int16_t **lumSrc, int lumFilterSize,
+                                            const int16_t *chrFilter, const int16_t **chrSrc,
+                                            int chrFilterSize, const int16_t **alpSrc,
+                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
-                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
-                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
-                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
-                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX_ACCURATE
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-
-                    YSCALEYUV2PACKEDX_END
-                }
+    YSCALEYUV2PACKEDX_ACCURATE
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    "psraw $3, %%mm3    \n\t"
+    "psraw $3, %%mm4    \n\t"
+    "psraw $3, %%mm1    \n\t"
+    "psraw $3, %%mm7    \n\t"
+    WRITEYUY2(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
+                                         const int16_t **lumSrc, int lumFilterSize,
+                                         const int16_t *chrFilter, const int16_t **chrSrc,
+                                         int chrFilterSize, const int16_t **alpSrc,
+                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
 
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-                "add %4, %%"REG_c"                        \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest), "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
+    YSCALEYUV2PACKEDX
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    "psraw $3, %%mm3    \n\t"
+    "psraw $3, %%mm4    \n\t"
+    "psraw $3, %%mm1    \n\t"
+    "psraw $3, %%mm7    \n\t"
+    WRITEYUY2(%4, %5, %%REGa)
+    YSCALEYUV2PACKEDX_END
 }
 
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
 
-static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
+#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
+    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
+#define REAL_YSCALEYUV2RGB_COEFF(c) \
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
 
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
+#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                          const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX_ACCURATE
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
-                    "psraw                        $3, %%mm1         \n\t"
-                    "psraw                        $3, %%mm7         \n\t"
-                    "packuswb                  %%mm7, %%mm1         \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                } else {
-                    YSCALEYUV2PACKEDX
-                    YSCALEYUV2RGBX
-                    "pcmpeqd %%mm7, %%mm7 \n\t"
-                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    YSCALEYUV2PACKEDX_END
-                }
-}
-
-static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-                "add                        %4, %%"REG_c"   \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest),  "m" (dstW_reg)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-}
-
-static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
-
-static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
-{
-    x86_reg dummy=0;
-    x86_reg dstW_reg = dstW;
-
-                YSCALEYUV2PACKEDX
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-}
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
+    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
+    REAL_YSCALEYUV2RGB_COEFF(c)
 
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *buf1, const uint16_t *uvbuf0,
+                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *abuf1, uint8_t *dest,
+                                       int dstW, int yalpha, int uvalpha, int y)
 {
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
-                __asm__ volatile(
-                    YSCALEYUV2RGB(%%r8, %5)
-                    YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
-                    "a" (&c->redDither)
-                    ,"r" (abuf0), "r" (abuf1)
-                    : "%r8"
-                );
+        __asm__ volatile(
+            YSCALEYUV2RGB(%%r8, %5)
+            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
+            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "packuswb            %%mm7, %%mm1       \n\t"
+            WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
+               "a" (&c->redDither),
+               "r" (abuf0), "r" (abuf1)
+            : "%r8"
+        );
 #else
-                c->u_temp=(intptr_t)abuf0;
-                c->v_temp=(intptr_t)abuf1;
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "push                   %0              \n\t"
-                    "push                   %1              \n\t"
-                    "mov          "U_TEMP"(%5), %0          \n\t"
-                    "mov          "V_TEMP"(%5), %1          \n\t"
-                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
-                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
-                    "packuswb            %%mm7, %%mm1       \n\t"
-                    "pop                    %1              \n\t"
-                    "pop                    %0              \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
+        c->u_temp=(intptr_t)abuf0;
+        c->v_temp=(intptr_t)abuf1;
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB(%%REGBP, %5)
+            "push                   %0              \n\t"
+            "push                   %1              \n\t"
+            "mov          "U_TEMP"(%5), %0          \n\t"
+            "mov          "V_TEMP"(%5), %1          \n\t"
+            YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
+            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
+            "packuswb            %%mm7, %%mm1       \n\t"
+            "pop                    %1              \n\t"
+            "pop                    %0              \n\t"
+            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
 #endif
-            } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB(%%REGBP, %5)
-                    "pcmpeqd %%mm7, %%mm7                   \n\t"
-                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-            }
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB(%%REGBP, %5)
+            "pcmpeqd %%mm7, %%mm7                   \n\t"
+            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *buf1, const uint16_t *uvbuf0,
+                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *abuf1, uint8_t *dest,
+                                       int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *buf1, const uint16_t *uvbuf0,
+                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *abuf1, uint8_t *dest,
+                                        int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+        WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *buf1, const uint16_t *uvbuf0,
+                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *abuf1, uint8_t *dest,
+                                        int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov        %4, %%"REG_b"               \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2RGB(%%REGBP, %5)
+        "pxor    %%mm7, %%mm7                   \n\t"
+        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+        WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
-static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+#define REAL_YSCALEYUV2PACKED(index, c) \
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
+    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
+    "psraw                $3, %%mm0                           \n\t"\
+    "psraw                $3, %%mm1                           \n\t"\
+    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "xor            "#index", "#index"                        \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+
+static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
+                                         const uint16_t *buf1, const uint16_t *uvbuf0,
+                                         const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                         const uint16_t *abuf1, uint8_t *dest,
+                                         int dstW, int yalpha, int uvalpha, int y)
 {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov %4, %%"REG_b"                        \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-            );
+    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+    __asm__ volatile(
+        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+        "mov %4, %%"REG_b"                        \n\t"
+        "push %%"REG_BP"                        \n\t"
+        YSCALEYUV2PACKED(%%REGBP, %5)
+        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+        "pop %%"REG_BP"                         \n\t"
+        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+           "a" (&c->redDither)
+    );
 }
 
+#define REAL_YSCALEYUV2RGB1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+
+// do vertical chrominance interpolation
+#define REAL_YSCALEYUV2RGB1b(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
+    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+
+#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
+    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
+    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
+    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
+    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
+    "packuswb          %%mm1, %%mm7     \n\t"
+#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
+
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest,
+                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                       int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
         } else {
-                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                        "mov        %4, %%"REG_b"               \n\t"
-                        "push %%"REG_BP"                        \n\t"
-                        YSCALEYUV2RGB1b(%%REGBP, %5)
-                        "pcmpeqd %%mm7, %%mm7                   \n\t"
-                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                        "pop %%"REG_BP"                         \n\t"
-                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                        "a" (&c->redDither)
-                    );
-                }
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
         }
+    } else {
+        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%REGBP)
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
+        } else {
+            __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                   "a" (&c->redDither)
+            );
+        }
+    }
 }
 
-static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
+                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *abuf0, uint8_t *dest,
+                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                       int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *abuf0, uint8_t *dest,
+                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                        int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+            WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
+                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *abuf0, uint8_t *dest,
+                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                        int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2RGB1b(%%REGBP, %5)
-                    "pxor    %%mm7, %%mm7                   \n\t"
-                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "pxor    %%mm7, %%mm7                   \n\t"
+            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
 #endif
-
-                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+            WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
-static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+#define REAL_YSCALEYUV2PACKED1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $7, %%mm3     \n\t" \
+    "psraw                $7, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t" \
+
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
+    "xor "#index", "#index"             \n\t"\
+    ".p2align              4            \n\t"\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $8, %%mm3     \n\t" \
+    "psrlw                $8, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t"
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+
+static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
+                                         const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                         const uint16_t *abuf0, uint8_t *dest,
+                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                         int flags, int y)
 {
-        const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        } else {
-                __asm__ volatile(
-                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                    "mov        %4, %%"REG_b"               \n\t"
-                    "push %%"REG_BP"                        \n\t"
-                    YSCALEYUV2PACKED1b(%%REGBP, %5)
-                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                    "pop %%"REG_BP"                         \n\t"
-                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                    "a" (&c->redDither)
-                );
-        }
+    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2PACKED1(%%REGBP, %5)
+            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    } else {
+        __asm__ volatile(
+            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+            "mov        %4, %%"REG_b"               \n\t"
+            "push %%"REG_BP"                        \n\t"
+            YSCALEYUV2PACKED1b(%%REGBP, %5)
+            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+            "pop %%"REG_BP"                         \n\t"
+            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+               "a" (&c->redDither)
+        );
+    }
 }
 
 #if !COMPILE_TEMPLATE_MMX2

From fc72ec727e8731d57ede82502081366921667486 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 17:50:03 -0400
Subject: [PATCH 403/830] swscale: reindent h[cy]scale_fast() and
 updateDitherTables().

---
 libswscale/x86/swscale_template.c | 261 +++++++++++++++---------------
 1 file changed, 131 insertions(+), 130 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index a6abb9e805..4181cce9b5 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2169,64 +2169,64 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
 
-        __asm__ volatile(
+    __asm__ volatile(
 #if defined(PIC)
-            "mov               %%"REG_b", %5        \n\t"
+        "mov               %%"REG_b", %5        \n\t"
 #endif
-            "pxor                  %%mm7, %%mm7     \n\t"
-            "mov                      %0, %%"REG_c" \n\t"
-            "mov                      %1, %%"REG_D" \n\t"
-            "mov                      %2, %%"REG_d" \n\t"
-            "mov                      %3, %%"REG_b" \n\t"
-            "xor               %%"REG_a", %%"REG_a" \n\t" // i
-            PREFETCH"        (%%"REG_c")            \n\t"
-            PREFETCH"      32(%%"REG_c")            \n\t"
-            PREFETCH"      64(%%"REG_c")            \n\t"
+        "pxor                  %%mm7, %%mm7     \n\t"
+        "mov                      %0, %%"REG_c" \n\t"
+        "mov                      %1, %%"REG_D" \n\t"
+        "mov                      %2, %%"REG_d" \n\t"
+        "mov                      %3, %%"REG_b" \n\t"
+        "xor               %%"REG_a", %%"REG_a" \n\t" // i
+        PREFETCH"        (%%"REG_c")            \n\t"
+        PREFETCH"      32(%%"REG_c")            \n\t"
+        PREFETCH"      64(%%"REG_c")            \n\t"
 
 #if ARCH_X86_64
-
 #define CALL_MMX2_FILTER_CODE \
-            "movl            (%%"REG_b"), %%esi     \n\t"\
-            "call                    *%4            \n\t"\
-            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-            "add               %%"REG_S", %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl            (%%"REG_b"), %%esi     \n\t"\
+        "call                    *%4            \n\t"\
+        "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
+        "add               %%"REG_S", %%"REG_c" \n\t"\
+        "add               %%"REG_a", %%"REG_D" \n\t"\
+        "xor               %%"REG_a", %%"REG_a" \n\t"\
 
 #else
-
 #define CALL_MMX2_FILTER_CODE \
-            "movl (%%"REG_b"), %%esi        \n\t"\
-            "call         *%4                       \n\t"\
-            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl (%%"REG_b"), %%esi        \n\t"\
+        "call         *%4                       \n\t"\
+        "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+        "add               %%"REG_a", %%"REG_D" \n\t"\
+        "xor               %%"REG_a", %%"REG_a" \n\t"\
 
 #endif /* ARCH_X86_64 */
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-            "mov                      %5, %%"REG_b" \n\t"
+        "mov                      %5, %%"REG_b" \n\t"
 #endif
-            :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode)
+        :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode)
 #if defined(PIC)
-            ,"m" (ebxsave)
+          ,"m" (ebxsave)
 #endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
 #if !defined(PIC)
-            ,"%"REG_b
+         ,"%"REG_b
 #endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
+    );
+
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
 }
 
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
@@ -2242,54 +2242,55 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
     DECLARE_ALIGNED(8, uint64_t, ebxsave);
 #endif
 
-        __asm__ volatile(
+    __asm__ volatile(
 #if defined(PIC)
-            "mov          %%"REG_b", %6         \n\t"
+        "mov          %%"REG_b", %6         \n\t"
 #endif
-            "pxor             %%mm7, %%mm7      \n\t"
-            "mov                 %0, %%"REG_c"  \n\t"
-            "mov                 %1, %%"REG_D"  \n\t"
-            "mov                 %2, %%"REG_d"  \n\t"
-            "mov                 %3, %%"REG_b"  \n\t"
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
+        "pxor             %%mm7, %%mm7      \n\t"
+        "mov                 %0, %%"REG_c"  \n\t"
+        "mov                 %1, %%"REG_D"  \n\t"
+        "mov                 %2, %%"REG_d"  \n\t"
+        "mov                 %3, %%"REG_b"  \n\t"
+        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+        PREFETCH"   (%%"REG_c")             \n\t"
+        PREFETCH" 32(%%"REG_c")             \n\t"
+        PREFETCH" 64(%%"REG_c")             \n\t"
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            "mov                 %5, %%"REG_c"  \n\t" // src
-            "mov                 %1, %%"REG_D"  \n\t" // buf1
-            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+        "mov                 %5, %%"REG_c"  \n\t" // src
+        "mov                 %1, %%"REG_D"  \n\t" // buf1
+        "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+        PREFETCH"   (%%"REG_c")             \n\t"
+        PREFETCH" 32(%%"REG_c")             \n\t"
+        PREFETCH" 64(%%"REG_c")             \n\t"
 
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
-            CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
+        CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-            "mov %6, %%"REG_b"    \n\t"
+        "mov %6, %%"REG_b"    \n\t"
 #endif
-            :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
-            "m" (mmx2FilterCode), "m" (src2)
+        :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode), "m" (src2)
 #if defined(PIC)
-            ,"m" (ebxsave)
+          ,"m" (ebxsave)
 #endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
 #if !defined(PIC)
-            ,"%"REG_b
+         ,"%"REG_b
 #endif
-        );
-        for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-            dst[i] = src1[srcW-1]*128;
-            dst[i+VOFW] = src2[srcW-1]*128;
-        }
+    );
+
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst[i] = src1[srcW-1]*128;
+        dst[i+VOFW] = src2[srcW-1]*128;
+    }
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
@@ -2317,62 +2318,62 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
     const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
 
-        c->blueDither= ff_dither8[dstY&1];
-        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-            c->greenDither= ff_dither8[dstY&1];
-        else
-            c->greenDither= ff_dither4[dstY&1];
-        c->redDither= ff_dither8[(dstY+1)&1];
-        if (dstY < dstH - 2) {
-            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            int i;
-            if (flags & SWS_ACCURATE_RND) {
-                int s= APCK_SIZE / 8;
-                for (i=0; i<vLumFilterSize; i+=2) {
-                    *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                    *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                              lumMmxFilter[s*i+APCK_COEF/4  ]=
-                              lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                        + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
-                        *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
-                                  alpMmxFilter[s*i+APCK_COEF/4  ]=
-                                  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i+=2) {
-                    *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                    *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
-                              chrMmxFilter[s*i+APCK_COEF/4  ]=
-                              chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                        + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-                }
-            } else {
-                for (i=0; i<vLumFilterSize; i++) {
-                    lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                    lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
-                    lumMmxFilter[4*i+2]=
-                    lumMmxFilter[4*i+3]=
-                        ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
-                    if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                        alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
-                        alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
-                        alpMmxFilter[4*i+2]=
-                        alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
-                    }
-                }
-                for (i=0; i<vChrFilterSize; i++) {
-                    chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                    chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
-                    chrMmxFilter[4*i+2]=
-                    chrMmxFilter[4*i+3]=
-                        ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+    c->blueDither= ff_dither8[dstY&1];
+    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+        c->greenDither= ff_dither8[dstY&1];
+    else
+        c->greenDither= ff_dither4[dstY&1];
+    c->redDither= ff_dither8[(dstY+1)&1];
+    if (dstY < dstH - 2) {
+        const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+        const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+        int i;
+        if (flags & SWS_ACCURATE_RND) {
+            int s= APCK_SIZE / 8;
+            for (i=0; i<vLumFilterSize; i+=2) {
+                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                lumMmxFilter[s*i+APCK_COEF/4  ]=
+                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
+                           + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
+                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
+                    alpMmxFilter[s*i+APCK_COEF/4  ]=
+                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
                 }
             }
+            for (i=0; i<vChrFilterSize; i+=2) {
+                *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                chrMmxFilter[s*i+APCK_COEF/4  ]=
+                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                           + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
+            }
+        } else {
+            for (i=0; i<vLumFilterSize; i++) {
+                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
+                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                lumMmxFilter[4*i+2]=
+                lumMmxFilter[4*i+3]=
+                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
+                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
+                    alpMmxFilter[4*i+2]=
+                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
+                }
+            }
+            for (i=0; i<vChrFilterSize; i++) {
+                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
+                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                chrMmxFilter[4*i+2]=
+                chrMmxFilter[4*i+3]=
+                    ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+            }
         }
+    }
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 

From 78046dadc3145a7afd16034ab1178033a053a03e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 May 2011 18:28:40 -0400
Subject: [PATCH 404/830] rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2
 functions.

Many functions have such a prefix, but do not actually use any
instructions or features from that set, thus giving the false
impression that swscale is highly optimized for a particular
system, whereas in reality it is not.
---
 libswscale/x86/rgb2rgb.c          |  2 +-
 libswscale/x86/rgb2rgb_template.c | 74 +++++++++++++++++--------------
 2 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 81b29f32ff..78b804e367 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -111,7 +111,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_AMD3DNOW
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_SSE2 1
+#define COMPILE_TEMPLATE_SSE2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNOW
 #include "rgb2rgb_template.c"
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 8e6ce8586a..c4245afb34 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -30,15 +30,8 @@
 #undef MOVNTQ
 #undef EMMS
 #undef SFENCE
-#undef MMREG_SIZE
 #undef PAVGB
 
-#if COMPILE_TEMPLATE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
 #if COMPILE_TEMPLATE_AMD3DNOW
 #define PREFETCH  "prefetch"
 #define PAVGB     "pavgusb"
@@ -64,6 +57,10 @@
 #define SFENCE " # nop"
 #endif
 
+#if !COMPILE_TEMPLATE_SSE2
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+
 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     uint8_t *dest = dst;
@@ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
 {
     long x,y;
@@ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     dst+= dstStride;
 
     for (y=1; y<srcHeight; y++) {
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
         const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
@@ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
             "punpckhbw              %%mm3, %%mm7    \n\t"
             "punpcklbw              %%mm2, %%mm4    \n\t"
             "punpckhbw              %%mm2, %%mm6    \n\t"
-#if 1
             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#else
-            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#endif
             "add                       $8, %%"REG_a"            \n\t"
             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
@@ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
                "g" (-mmxSize)
             : "%"REG_a
         );
-#else
-        const x86_reg mmxSize=1;
-
-        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
-        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
-#endif
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
@@ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     }
 
     // last line
-#if 1
     dst[0]= src[0];
 
     for (x=0; x<srcWidth-1; x++) {
@@ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
     }
     dst[2*srcWidth-1]= src[srcWidth-1];
-#else
-    for (x=0; x<srcWidth; x++) {
-        dst[2*x+0]=
-        dst[2*x+1]= src[x];
-    }
-#endif
 
     __asm__ volatile(EMMS"       \n\t"
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 
      rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
 }
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
                                     long width, long height, long src1Stride,
                                     long src2Stride, long dstStride)
@@ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::: "memory"
             );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
                                        long width, long height,
@@ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
 {
@@ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
@@ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
 static inline void RENAME(rgb2rgb_init)(void)
 {
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
     rgb15to16          = RENAME(rgb15to16);
     rgb15tobgr24       = RENAME(rgb15tobgr24);
     rgb15to32          = RENAME(rgb15to32);
@@ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void)
     yuv422ptoyuy2      = RENAME(yuv422ptoyuy2);
     yuv422ptouyvy      = RENAME(yuv422ptouyvy);
     yuy2toyv12         = RENAME(yuy2toyv12);
-    planar2x           = RENAME(planar2x);
-    rgb24toyv12        = RENAME(rgb24toyv12);
-    interleaveBytes    = RENAME(interleaveBytes);
     vu9_to_vu12        = RENAME(vu9_to_vu12);
     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
-
-    uyvytoyuv420       = RENAME(uyvytoyuv420);
     uyvytoyuv422       = RENAME(uyvytoyuv422);
-    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
     yuyvtoyuv422       = RENAME(yuyvtoyuv422);
+#endif /* !COMPILE_TEMPLATE_SSE2 */
+
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+    planar2x           = RENAME(planar2x);
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
+    rgb24toyv12        = RENAME(rgb24toyv12);
+
+    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
+    uyvytoyuv420       = RENAME(uyvytoyuv420);
+#endif /* COMPILE_TEMPLATE_SSE2 */
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+    interleaveBytes    = RENAME(interleaveBytes);
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 }

From 945dda41a2085b3c8fb5cf99026c224aa3d2cbfa Mon Sep 17 00:00:00 2001
From: Alok Ahuja <waveletcoeff@gmail.com>
Date: Sat, 28 May 2011 08:12:22 +0200
Subject: [PATCH 405/830] webm: support stereo videos in matroska/webm muxer

Create a stereo_mode metadata tag to specify the stereo 3d video layout
using the StereoMode tag in a matroska/webm video track.
---
 doc/muxers.texi           | 63 +++++++++++++++++++++++++++++++++++++++
 libavformat/matroska.h    | 20 ++++++++++++-
 libavformat/matroskaenc.c | 19 ++++++++++++
 3 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 82f17ba105..74c014bc70 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -204,4 +204,67 @@ Alternatively you can write the command as:
 ffmpeg -benchmark -i INPUT -f null -
 @end example
 
+@section matroska
+
+Matroska container muxer.
+
+This muxer implements the matroska and webm container specs.
+
+The recognized metadata settings in this muxer are:
+
+@table @option
+
+@item title=@var{title name}
+Name provided to a single track
+@end table
+
+@table @option
+
+@item language=@var{language name}
+Specifies the language of the track in the Matroska languages form
+@end table
+
+@table @option
+
+@item STEREO_MODE=@var{mode}
+Stereo 3D video layout of two views in a single video track
+@table @option
+@item mono
+video is not stereo
+@item left_right
+Both views are arranged side by side, Left-eye view is on the left
+@item bottom_top
+Both views are arranged in top-bottom orientation, Left-eye view is at bottom
+@item top_bottom
+Both views are arranged in top-bottom orientation, Left-eye view is on top
+@item checkerboard_rl
+Each view is arranged in a checkerboard interleaved pattern, Left-eye view being first
+@item checkerboard_lr
+Each view is arranged in a checkerboard interleaved pattern, Right-eye view being first
+@item row_interleaved_rl
+Each view is constituted by a row based interleaving, Right-eye view is first row
+@item row_interleaved_lr
+Each view is constituted by a row based interleaving, Left-eye view is first row
+@item col_interleaved_rl
+Both views are arranged in a column based interleaving manner, Right-eye view is first column
+@item col_interleaved_lr
+Both views are arranged in a column based interleaving manner, Left-eye view is first column
+@item anaglyph_cyan_red
+All frames are in anaglyph format viewable through red-cyan filters
+@item right_left
+Both views are arranged side by side, Right-eye view is on the left
+@item anaglyph_green_magenta
+All frames are in anaglyph format viewable through green-magenta filters
+@item block_lr
+Both eyes laced in one Block, Left-eye view is first
+@item block_rl
+Both eyes laced in one Block, Right-eye view is first
+@end table
+@end table
+
+For example a 3D WebM clip can be created using the following command line:
+@example
+ffmpeg -i sample_left_right_clip.mpg -an -vcodec libvpx -metadata STEREO_MODE=left_right -y stereo_clip.webm
+@end example
+
 @c man end MUXERS
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 45d9747d39..8e747e6a9c 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -111,7 +111,7 @@
 #define MATROSKA_ID_VIDEOPIXELCROPR 0x54DD
 #define MATROSKA_ID_VIDEODISPLAYUNIT 0x54B2
 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
-#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
+#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B8
 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
 #define MATROSKA_ID_VIDEOCOLORSPACE 0x2EB524
 
@@ -218,6 +218,24 @@ typedef enum {
   MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3,
 } MatroskaTrackEncodingCompAlgo;
 
+typedef enum {
+  MATROSKA_VIDEO_STEREOMODE_TYPE_MONO               = 0,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT         = 1,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP         = 2,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM         = 3,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL    = 4,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR    = 5,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED  = 10,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT         = 11,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14,
+} MatroskaVideoStereoModeType;
+
 /*
  * Matroska Codec IDs, strings
  */
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 1bbabc9ff4..ba2ce28e17 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -586,6 +586,25 @@ static int mkv_write_tracks(AVFormatContext *s)
                 // XXX: interlace flag?
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width);
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height);
+                if ((tag = av_metadata_get(s->metadata, "stereo_mode", NULL, 0))) {
+                    uint8_t stereo_fmt = atoi(tag->value);
+                    int valid_fmt = 0;
+
+                    switch (mkv->mode) {
+                    case MODE_WEBM:
+                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM
+                            || stereo_fmt == MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT)
+                            valid_fmt = 1;
+                        break;
+                    case MODE_MATROSKAv2:
+                        if (stereo_fmt <= MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL)
+                            valid_fmt = 1;
+                        break;
+                    }
+
+                    if (valid_fmt)
+                        put_ebml_uint (pb, MATROSKA_ID_VIDEOSTEREOMODE, stereo_fmt);
+                }
                 if (st->sample_aspect_ratio.num) {
                     int d_width = codec->width*av_q2d(st->sample_aspect_ratio);
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYWIDTH , d_width);

From 88aa21593932fbf5597996041ab669848c542ee5 Mon Sep 17 00:00:00 2001
From: James Zern <git@videolan.org>
Date: Tue, 12 Apr 2011 04:40:47 +0200
Subject: [PATCH 406/830] vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping

via the equivalent AVCodecContext::mb_threshold

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libvpxenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 79009b1ad5..02f8135381 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -311,6 +311,7 @@ static av_cold int vp8_init(AVCodecContext *avctx)
     codecctl_int(avctx, VP8E_SET_CPUUSED,           cpuused);
     codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, avctx->noise_reduction);
     codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
+    codecctl_int(avctx, VP8E_SET_STATIC_THRESHOLD,  avctx->mb_threshold);
 
     //provide dummy value to initialize wrapper, values will be updated each _encode()
     vpx_img_wrap(&ctx->rawimg, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1,

From 009f829dde811af654af7110326aea3a72c05d5e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 27 May 2011 19:01:47 -0400
Subject: [PATCH 407/830] swscale: fix crash in bilinear scaling.

---
 libswscale/swscale_internal.h     |   2 +
 libswscale/utils.c                |   1 +
 libswscale/x86/swscale_template.c | 141 +++++++++++++-----------------
 3 files changed, 66 insertions(+), 78 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 8ba0fe9090..d41828a4ca 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -194,6 +194,7 @@ typedef struct SwsContext {
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
 #define UV_OFF                "11*8+4*4*256*3+48"
+#define UV_OFFx2              "11*8+4*4*256*3+56"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -217,6 +218,7 @@ typedef struct SwsContext {
     DECLARE_ALIGNED(8, uint64_t, y_temp);
     int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
     DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
 
 #if HAVE_ALTIVEC
     vector signed short   CY;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 571f45be3d..a2a6a1e191 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1001,6 +1001,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
     c->uv_off = dst_stride_px;
+    c->uv_offx2 = dst_stride;
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 5cc3a435be..ba0a1978cc 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -897,16 +897,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
     YSCALEYUV2PACKEDX_END
 }
 
-#define REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -969,8 +969,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 
 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-#define YSCALEYUV2RGB(index, c, uv_off) \
-    REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
     REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
     REAL_YSCALEYUV2RGB_COEFF(c)
 
@@ -984,12 +984,10 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
         __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5, %8)
+            YSCALEYUV2RGB(%%r8, %5)
             YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
@@ -997,7 +995,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
                "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1), "m"(uv_off)
+               "r" (abuf0), "r" (abuf1)
             : "%r8"
         );
 #else
@@ -1007,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
             "push                   %0              \n\t"
             "push                   %1              \n\t"
             "mov          "U_TEMP"(%5), %0          \n\t"
@@ -1022,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
 #endif
     } else {
@@ -1030,13 +1028,13 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
             "pcmpeqd %%mm7, %%mm7                   \n\t"
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1048,20 +1046,18 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
@@ -1072,14 +1068,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1091,7 +1085,7 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
@@ -1102,14 +1096,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1121,11 +1113,11 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
-#define REAL_YSCALEYUV2PACKED(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED(index, c) \
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
     "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
     "psraw                $3, %%mm0                           \n\t"\
@@ -1137,10 +1129,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1163,7 +1155,7 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
     "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-#define YSCALEYUV2PACKED(index, c, uv_off)  REAL_YSCALEYUV2PACKED(index, c, uv_off)
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
 
 static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *buf1, const uint16_t *ubuf0,
@@ -1172,30 +1164,28 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *abuf1, uint8_t *dest,
                                          int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov %4, %%"REG_b"                        \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5, %6)
+        YSCALEYUV2PACKED(%%REGBP, %5)
         WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
-#define REAL_YSCALEYUV2RGB1(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
     "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
     "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@@ -1237,19 +1227,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1(index, c, uv_off)  REAL_YSCALEYUV2RGB1(index, c, uv_off)
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
 
 // do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1b(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@@ -1293,7 +1283,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1b(index, c, uv_off)  REAL_YSCALEYUV2RGB1b(index, c, uv_off)
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
 
 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
     "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
@@ -1313,7 +1303,6 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1322,26 +1311,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         }
     } else {
@@ -1350,26 +1339,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         }
     }
@@ -1382,7 +1371,6 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1390,26 +1378,26 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1421,7 +1409,6 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1429,7 +1416,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1441,14 +1428,14 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1460,7 +1447,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1472,7 +1459,6 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1480,7 +1466,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1492,14 +1478,14 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1511,19 +1497,19 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
 
-#define REAL_YSCALEYUV2PACKED1(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $7, %%mm3     \n\t" \
     "psraw                $7, %%mm4     \n\t" \
     "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@@ -1531,18 +1517,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t" \
 
-#define YSCALEYUV2PACKED1(index, c, uv_off)  REAL_YSCALEYUV2PACKED1(index, c, uv_off)
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
 
-#define REAL_YSCALEYUV2PACKED1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
     "xor "#index", "#index"             \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $8, %%mm3     \n\t" \
@@ -1551,7 +1537,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c, uv_off)  REAL_YSCALEYUV2PACKED1b(index, c, uv_off)
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
 static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *ubuf0, const uint16_t *ubuf1,
@@ -1560,7 +1546,6 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                          int dstW, int uvalpha, enum PixelFormat dstFormat,
                                          int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1568,24 +1553,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1(%%REGBP, %5)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1b(%%REGBP, %5)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }

From 26513856d657de3b3afacae7c13fd99a7fe79d05 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 01:53:00 +0200
Subject: [PATCH 408/830] cmdutils: remove OPT_FUNC2

Make ff* tools only accept opt_* functions taking two arguments.

The distinction between functions with one and two arguments is quite
pointless. Simplify parse_options() code.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 cmdutils.c             |   6 +-
 cmdutils.h             |  10 +-
 cmdutils_common_opts.h |   2 +-
 ffmpeg.c               | 215 ++++++++++++++++++++++-------------------
 ffplay.c               |  33 ++++---
 ffprobe.c              |   7 +-
 6 files changed, 144 insertions(+), 129 deletions(-)

diff --git a/cmdutils.c b/cmdutils.c
index 778f0fb3cf..31866b5563 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -277,13 +277,11 @@ unknown_opt:
                 *po->u.int64_arg = parse_number_or_die(opt, arg, OPT_INT64, INT64_MIN, INT64_MAX);
             } else if (po->flags & OPT_FLOAT) {
                 *po->u.float_arg = parse_number_or_die(opt, arg, OPT_FLOAT, -INFINITY, INFINITY);
-            } else if (po->flags & OPT_FUNC2) {
-                if (po->u.func2_arg(opt, arg) < 0) {
+            } else {
+                if (po->u.func_arg(opt, arg) < 0) {
                     fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt);
                     exit(1);
                 }
-            } else if (po->u.func_arg) {
-                    po->u.func_arg(arg);
             }
             if(po->flags & OPT_EXIT)
                 exit(0);
diff --git a/cmdutils.h b/cmdutils.h
index 3bb1cd616b..e231b1ff84 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -121,16 +121,14 @@ typedef struct {
 #define OPT_INT    0x0080
 #define OPT_FLOAT  0x0100
 #define OPT_SUBTITLE 0x0200
-#define OPT_FUNC2  0x0400
-#define OPT_INT64  0x0800
-#define OPT_EXIT   0x1000
-#define OPT_DATA   0x2000
+#define OPT_INT64  0x0400
+#define OPT_EXIT   0x0800
+#define OPT_DATA   0x1000
      union {
-        void (*func_arg)(const char *); //FIXME passing error code as int return would be nicer then exit() in the func
         int *int_arg;
         char **str_arg;
         float *float_arg;
-        int (*func2_arg)(const char *, const char *);
+        int (*func_arg)(const char *, const char *);
         int64_t *int64_arg;
     } u;
     const char *help;
diff --git a/cmdutils_common_opts.h b/cmdutils_common_opts.h
index da309977bd..9b5e5d22cd 100644
--- a/cmdutils_common_opts.h
+++ b/cmdutils_common_opts.h
@@ -10,4 +10,4 @@
     { "protocols", OPT_EXIT, {(void*)show_protocols}, "show available protocols" },
     { "filters",   OPT_EXIT, {(void*)show_filters  }, "show available filters" },
     { "pix_fmts" , OPT_EXIT, {(void*)show_pix_fmts }, "show available pixel formats" },
-    { "loglevel", HAS_ARG | OPT_FUNC2, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" },
+    { "loglevel", HAS_ARG, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" },
diff --git a/ffmpeg.c b/ffmpeg.c
index f27513da26..7721438ad1 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2704,14 +2704,16 @@ static int transcode(AVFormatContext **output_files,
     return ret;
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     last_asked_format = arg;
+    return 0;
 }
 
-static void opt_video_rc_override_string(const char *arg)
+static int opt_video_rc_override_string(const char *opt, const char *arg)
 {
     video_rc_override_string = arg;
+    return 0;
 }
 
 static int opt_me_threshold(const char *opt, const char *arg)
@@ -2753,12 +2755,13 @@ static int opt_frame_crop(const char *opt, const char *arg)
     return AVERROR(EINVAL);
 }
 
-static void opt_frame_size(const char *arg)
+static int opt_frame_size(const char *opt, const char *arg)
 {
     if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) {
         fprintf(stderr, "Incorrect frame size\n");
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static int opt_pad(const char *opt, const char *arg) {
@@ -2766,21 +2769,22 @@ static int opt_pad(const char *opt, const char *arg) {
     return -1;
 }
 
-static void opt_frame_pix_fmt(const char *arg)
+static int opt_frame_pix_fmt(const char *opt, const char *arg)
 {
     if (strcmp(arg, "list")) {
         frame_pix_fmt = av_get_pix_fmt(arg);
         if (frame_pix_fmt == PIX_FMT_NONE) {
             fprintf(stderr, "Unknown pixel format requested: %s\n", arg);
-            ffmpeg_exit(1);
+            return AVERROR(EINVAL);
         }
     } else {
         show_pix_fmts();
         ffmpeg_exit(0);
     }
+    return 0;
 }
 
-static void opt_frame_aspect_ratio(const char *arg)
+static int opt_frame_aspect_ratio(const char *opt, const char *arg)
 {
     int x = 0, y = 0;
     double ar = 0;
@@ -2799,9 +2803,10 @@ static void opt_frame_aspect_ratio(const char *arg)
 
     if (!ar) {
         fprintf(stderr, "Incorrect aspect ratio specification.\n");
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
     frame_aspect_ratio = ar;
+    return 0;
 }
 
 static int opt_metadata(const char *opt, const char *arg)
@@ -2845,13 +2850,13 @@ static int opt_thread_count(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_audio_sample_fmt(const char *arg)
+static int opt_audio_sample_fmt(const char *opt, const char *arg)
 {
     if (strcmp(arg, "list")) {
         audio_sample_fmt = av_get_sample_fmt(arg);
         if (audio_sample_fmt == AV_SAMPLE_FMT_NONE) {
             av_log(NULL, AV_LOG_ERROR, "Invalid sample format '%s'\n", arg);
-            ffmpeg_exit(1);
+            return AVERROR(EINVAL);
         }
     } else {
         int i;
@@ -2860,6 +2865,7 @@ static void opt_audio_sample_fmt(const char *arg)
             printf("%s\n", av_get_sample_fmt_string(fmt_str, sizeof(fmt_str), i));
         ffmpeg_exit(0);
     }
+    return 0;
 }
 
 static int opt_audio_rate(const char *opt, const char *arg)
@@ -2880,12 +2886,13 @@ static int opt_video_channel(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_video_standard(const char *arg)
+static int opt_video_standard(const char *opt, const char *arg)
 {
     video_standard = av_strdup(arg);
+    return 0;
 }
 
-static void opt_codec(int *pstream_copy, char **pcodec_name,
+static int opt_codec(int *pstream_copy, char **pcodec_name,
                       int codec_type, const char *arg)
 {
     av_freep(pcodec_name);
@@ -2894,26 +2901,27 @@ static void opt_codec(int *pstream_copy, char **pcodec_name,
     } else {
         *pcodec_name = av_strdup(arg);
     }
+    return 0;
 }
 
-static void opt_audio_codec(const char *arg)
+static int opt_audio_codec(const char *opt, const char *arg)
 {
-    opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg);
+    return opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg);
 }
 
-static void opt_video_codec(const char *arg)
+static int opt_video_codec(const char *opt, const char *arg)
 {
-    opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg);
+    return opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg);
 }
 
-static void opt_subtitle_codec(const char *arg)
+static int opt_subtitle_codec(const char *opt, const char *arg)
 {
-    opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg);
+    return opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg);
 }
 
-static void opt_data_codec(const char *arg)
+static int opt_data_codec(const char *opt, const char *arg)
 {
-    opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg);
+    return opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg);
 }
 
 static int opt_codec_tag(const char *opt, const char *arg)
@@ -2934,7 +2942,7 @@ static int opt_codec_tag(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_map(const char *arg)
+static int opt_map(const char *opt, const char *arg)
 {
     AVStreamMap *m;
     char *p;
@@ -2957,6 +2965,7 @@ static void opt_map(const char *arg)
         m->sync_file_index = m->file_index;
         m->sync_stream_index = m->stream_index;
     }
+    return 0;
 }
 
 static void parse_meta_type(char *arg, char *type, int *index, char **endptr)
@@ -2980,7 +2989,7 @@ static void parse_meta_type(char *arg, char *type, int *index, char **endptr)
         *type = 'g';
 }
 
-static void opt_map_metadata(const char *arg)
+static int opt_map_metadata(const char *opt, const char *arg)
 {
     AVMetaDataMap *m, *m1;
     char *p;
@@ -3004,16 +3013,18 @@ static void opt_map_metadata(const char *arg)
         metadata_streams_autocopy = 0;
     if (m->type == 'c' || m1->type == 'c')
         metadata_chapters_autocopy = 0;
+
+    return 0;
 }
 
-static void opt_map_meta_data(const char *arg)
+static int opt_map_meta_data(const char *opt, const char *arg)
 {
     fprintf(stderr, "-map_meta_data is deprecated and will be removed soon. "
                     "Use -map_metadata instead.\n");
-    opt_map_metadata(arg);
+    return opt_map_metadata(opt, arg);
 }
 
-static void opt_map_chapters(const char *arg)
+static int opt_map_chapters(const char *opt, const char *arg)
 {
     AVChapterMap *c;
     char *p;
@@ -3026,9 +3037,10 @@ static void opt_map_chapters(const char *arg)
         p++;
 
     c->in_file = strtol(p, &p, 0);
+    return 0;
 }
 
-static void opt_input_ts_scale(const char *arg)
+static int opt_input_ts_scale(const char *opt, const char *arg)
 {
     unsigned int stream;
     double scale;
@@ -3044,6 +3056,7 @@ static void opt_input_ts_scale(const char *arg)
 
     input_files_ts_scale[nb_input_files] = grow_array(input_files_ts_scale[nb_input_files], sizeof(*input_files_ts_scale[nb_input_files]), &nb_input_files_ts_scale[nb_input_files], stream + 1);
     input_files_ts_scale[nb_input_files][stream]= scale;
+    return 0;
 }
 
 static int opt_recording_time(const char *opt, const char *arg)
@@ -3104,7 +3117,7 @@ static enum CodecID find_codec_or_die(const char *name, int type, int encoder, i
     return codec->id;
 }
 
-static void opt_input_file(const char *filename)
+static int opt_input_file(const char *opt, const char *filename)
 {
     AVFormatContext *ic;
     AVFormatParameters params, *ap = &params;
@@ -3330,6 +3343,7 @@ static void opt_input_file(const char *filename)
     av_freep(&subtitle_codec_name);
     uninit_opts();
     init_opts();
+    return 0;
 }
 
 static void check_inputs(int *has_video_ptr,
@@ -4016,7 +4030,7 @@ static void show_help(void)
     av_opt_show2(sws_opts, NULL, AV_OPT_FLAG_ENCODING_PARAM|AV_OPT_FLAG_DECODING_PARAM, 0);
 }
 
-static void opt_target(const char *arg)
+static int opt_target(const char *opt, const char *arg)
 {
     enum { PAL, NTSC, FILM, UNKNOWN } norm = UNKNOWN;
     static const char *const frame_rates[] = {"25", "30000/1001", "24000/1001"};
@@ -4073,13 +4087,12 @@ static void opt_target(const char *arg)
     }
 
     if(!strcmp(arg, "vcd")) {
+        opt_video_codec("vcodec", "mpeg1video");
+        opt_audio_codec("vcodec", "mp2");
+        opt_format("f", "vcd");
 
-        opt_video_codec("mpeg1video");
-        opt_audio_codec("mp2");
-        opt_format("vcd");
-
-        opt_frame_size(norm == PAL ? "352x288" : "352x240");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "352x288" : "352x240");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "1150000");
@@ -4102,12 +4115,12 @@ static void opt_target(const char *arg)
         mux_preload= (36000+3*1200) / 90000.0; //0.44
     } else if(!strcmp(arg, "svcd")) {
 
-        opt_video_codec("mpeg2video");
-        opt_audio_codec("mp2");
-        opt_format("svcd");
+        opt_video_codec("vcodec", "mpeg2video");
+        opt_audio_codec("acodec", "mp2");
+        opt_format("f", "svcd");
 
-        opt_frame_size(norm == PAL ? "480x576" : "480x480");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "480x576" : "480x480");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "2040000");
@@ -4124,12 +4137,12 @@ static void opt_target(const char *arg)
 
     } else if(!strcmp(arg, "dvd")) {
 
-        opt_video_codec("mpeg2video");
-        opt_audio_codec("ac3");
-        opt_format("dvd");
+        opt_video_codec("vcodec", "mpeg2video");
+        opt_audio_codec("vcodec", "ac3");
+        opt_format("f", "dvd");
 
-        opt_frame_size(norm == PAL ? "720x576" : "720x480");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("vcodec", norm == PAL ? "720x576" : "720x480");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "6000000");
@@ -4145,29 +4158,31 @@ static void opt_target(const char *arg)
 
     } else if(!strncmp(arg, "dv", 2)) {
 
-        opt_format("dv");
+        opt_format("f", "dv");
 
-        opt_frame_size(norm == PAL ? "720x576" : "720x480");
-        opt_frame_pix_fmt(!strncmp(arg, "dv50", 4) ? "yuv422p" :
-                          (norm == PAL ? "yuv420p" : "yuv411p"));
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "720x576" : "720x480");
+        opt_frame_pix_fmt("pix_fmt", !strncmp(arg, "dv50", 4) ? "yuv422p" :
+                          norm == PAL ? "yuv420p" : "yuv411p");
+        opt_frame_rate("r", frame_rates[norm]);
 
         audio_sample_rate = 48000;
         audio_channels = 2;
 
     } else {
         fprintf(stderr, "Unknown target: %s\n", arg);
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
-static void opt_vstats_file (const char *arg)
+static int opt_vstats_file(const char *opt, const char *arg)
 {
     av_free (vstats_filename);
     vstats_filename=av_strdup (arg);
+    return 0;
 }
 
-static void opt_vstats (void)
+static int opt_vstats(const char *opt, const char *arg)
 {
     char filename[40];
     time_t today2 = time(NULL);
@@ -4175,7 +4190,7 @@ static void opt_vstats (void)
 
     snprintf(filename, sizeof(filename), "vstats_%02d%02d%02d.log", today->tm_hour, today->tm_min,
              today->tm_sec);
-    opt_vstats_file(filename);
+    return opt_vstats_file(opt, filename);
 }
 
 static int opt_bsf(const char *opt, const char *arg)
@@ -4222,13 +4237,13 @@ static int opt_preset(const char *opt, const char *arg)
             ffmpeg_exit(1);
         }
         if(!strcmp(tmp, "acodec")){
-            opt_audio_codec(tmp2);
+            opt_audio_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "vcodec")){
-            opt_video_codec(tmp2);
+            opt_video_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "scodec")){
-            opt_subtitle_codec(tmp2);
+            opt_subtitle_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "dcodec")){
-            opt_data_codec(tmp2);
+            opt_data_codec(tmp, tmp2);
         }else if(opt_default(tmp, tmp2) < 0){
             fprintf(stderr, "%s: Invalid option or argument: '%s', parsed as '%s' = '%s'\n", filename, line, tmp, tmp2);
             ffmpeg_exit(1);
@@ -4252,17 +4267,17 @@ static const OptionDef options[] = {
     { "map_metadata", HAS_ARG | OPT_EXPERT, {(void*)opt_map_metadata}, "set metadata information of outfile from infile",
       "outfile[,metadata]:infile[,metadata]" },
     { "map_chapters",  HAS_ARG | OPT_EXPERT, {(void*)opt_map_chapters},  "set chapters mapping", "outfile:infile" },
-    { "t", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" },
+    { "t", HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" },
     { "fs", HAS_ARG | OPT_INT64, {(void*)&limit_filesize}, "set the limit file size in bytes", "limit_size" }, //
-    { "ss", OPT_FUNC2 | HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" },
-    { "itsoffset", OPT_FUNC2 | HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" },
+    { "ss", HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" },
+    { "itsoffset", HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" },
     { "itsscale", HAS_ARG, {(void*)opt_input_ts_scale}, "set the input ts scale", "stream:scale" },
-    { "timestamp", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" },
-    { "metadata", OPT_FUNC2 | HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" },
+    { "timestamp", HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" },
+    { "metadata", HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" },
     { "dframes", OPT_INT | HAS_ARG, {(void*)&max_frames[AVMEDIA_TYPE_DATA]}, "set the number of data frames to record", "number" },
     { "benchmark", OPT_BOOL | OPT_EXPERT, {(void*)&do_benchmark},
       "add timings for benchmarking" },
-    { "timelimit", OPT_FUNC2 | HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" },
+    { "timelimit", HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" },
     { "dump", OPT_BOOL | OPT_EXPERT, {(void*)&do_pkt_dump},
       "dump each input packet" },
     { "hex", OPT_BOOL | OPT_EXPERT, {(void*)&do_hex_dump},
@@ -4270,9 +4285,9 @@ static const OptionDef options[] = {
     { "re", OPT_BOOL | OPT_EXPERT, {(void*)&rate_emu}, "read input at native frame rate", "" },
     { "loop_input", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" },
     { "loop_output", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&loop_output}, "number of times to loop output in formats that support looping (0 loops forever)", "" },
-    { "v", HAS_ARG | OPT_FUNC2, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" },
+    { "v", HAS_ARG, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" },
     { "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" },
-    { "threads", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
+    { "threads",  HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
     { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" },
     { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" },
     { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" },
@@ -4285,32 +4300,32 @@ static const OptionDef options[] = {
     { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)&copy_initial_nonkeyframes}, "copy initial non-keyframes" },
 
     /* video options */
-    { "b", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
-    { "vb", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "vb", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
     { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[AVMEDIA_TYPE_VIDEO]}, "set the number of video frames to record", "number" },
-    { "r", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
+    { "r", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
     { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
     { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format, 'list' as argument shows all the pixel formats supported", "format" },
-    { "croptop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "padtop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padcolor", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" },
+    { "croptop", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropleft", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropright", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "padtop", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padleft", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padright", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padcolor", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" },
     { "intra", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_only}, "use only intra frames"},
     { "vn", OPT_BOOL | OPT_VIDEO, {(void*)&video_disable}, "disable video" },
     { "vdt", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&video_discard}, "discard threshold", "n" },
-    { "qscale", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" },
+    { "qscale", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" },
     { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" },
     { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
-    { "me_threshold", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
+    { "me_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
     { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality},
       "use same quantizer as source (implies VBR)" },
-    { "pass", HAS_ARG | OPT_FUNC2 | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" },
+    { "pass", HAS_ARG | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" },
     { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename_prefix}, "select two pass log file name prefix", "prefix" },
     { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace},
       "deinterlace pictures" },
@@ -4322,39 +4337,39 @@ static const OptionDef options[] = {
 #endif
     { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" },
     { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" },
-    { "top", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
+    { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
     { "dc", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_dc_precision}, "intra_dc_precision", "precision" },
-    { "vtag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" },
-    { "newvideo", OPT_VIDEO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new video stream to the current output stream" },
+    { "vtag", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" },
+    { "newvideo", OPT_VIDEO, {(void*)opt_new_stream}, "add a new video stream to the current output stream" },
     { "vlang", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void *)&video_language}, "set the ISO 639 language code (3 letters) of the current video stream" , "code" },
     { "qphist", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, { (void *)&qp_hist }, "show QP histogram" },
     { "force_fps", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&force_fps}, "force the selected framerate, disable the best supported framerate selection" },
-    { "streamid", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" },
+    { "streamid", HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" },
     { "force_key_frames", OPT_STRING | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void *)&forced_key_frames}, "force key frames at specified timestamps", "timestamps" },
 
     /* audio options */
-    { "ab", OPT_FUNC2 | HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
     { "aframes", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&max_frames[AVMEDIA_TYPE_AUDIO]}, "set the number of audio frames to record", "number" },
     { "aq", OPT_FLOAT | HAS_ARG | OPT_AUDIO, {(void*)&audio_qscale}, "set audio quality (codec-specific)", "quality", },
-    { "ar", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" },
-    { "ac", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" },
+    { "ar", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" },
+    { "ac", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" },
     { "an", OPT_BOOL | OPT_AUDIO, {(void*)&audio_disable}, "disable audio" },
     { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_codec}, "force audio codec ('copy' to copy stream)", "codec" },
-    { "atag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" },
+    { "atag", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" },
     { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, //
-    { "newaudio", OPT_AUDIO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" },
+    { "newaudio", OPT_AUDIO, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" },
     { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void *)&audio_language}, "set the ISO 639 language code (3 letters) of the current audio stream" , "code" },
     { "sample_fmt", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_audio_sample_fmt}, "set sample format, 'list' as argument shows all the sample formats supported", "format" },
 
     /* subtitle options */
     { "sn", OPT_BOOL | OPT_SUBTITLE, {(void*)&subtitle_disable}, "disable subtitle" },
     { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_subtitle_codec}, "force subtitle codec ('copy' to copy stream)", "codec" },
-    { "newsubtitle", OPT_SUBTITLE | OPT_FUNC2, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" },
+    { "newsubtitle", OPT_SUBTITLE, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" },
     { "slang", HAS_ARG | OPT_STRING | OPT_SUBTITLE, {(void *)&subtitle_language}, "set the ISO 639 language code (3 letters) of the current subtitle stream" , "code" },
-    { "stag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" },
+    { "stag", HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" },
 
     /* grab options */
-    { "vc", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" },
+    { "vc", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" },
     { "tvstd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_standard}, "set television standard (NTSC, PAL (SECAM))", "standard" },
     { "isync", OPT_BOOL | OPT_EXPERT | OPT_GRAB, {(void*)&input_sync}, "sync read on input", "" },
 
@@ -4362,18 +4377,18 @@ static const OptionDef options[] = {
     { "muxdelay", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_max_delay}, "set the maximum demux-decode delay", "seconds" },
     { "muxpreload", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_preload}, "set the initial demux-decode delay", "seconds" },
 
-    { "absf", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
-    { "vbsf", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
-    { "sbsf", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "absf", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "vbsf", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "sbsf", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
 
-    { "apre", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" },
-    { "vpre", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" },
-    { "spre", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" },
-    { "fpre", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" },
+    { "apre", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" },
+    { "vpre", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" },
+    { "spre", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" },
+    { "fpre", HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" },
     /* data codec support */
     { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_data_codec}, "force data codec ('copy' to copy stream)", "codec" },
 
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { NULL, },
 };
 
diff --git a/ffplay.c b/ffplay.c
index 3bcab5452b..e036bbdf94 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -2837,16 +2837,17 @@ static void event_loop(void)
     }
 }
 
-static void opt_frame_size(const char *arg)
+static int opt_frame_size(const char *opt, const char *arg)
 {
     if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) {
         fprintf(stderr, "Incorrect frame size\n");
-        exit(1);
+        return AVERROR(EINVAL);
     }
     if ((frame_width % 2) != 0 || (frame_height % 2) != 0) {
         fprintf(stderr, "Frame size must be a multiple of 2\n");
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static int opt_width(const char *opt, const char *arg)
@@ -2861,18 +2862,20 @@ static int opt_height(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     file_iformat = av_find_input_format(arg);
     if (!file_iformat) {
         fprintf(stderr, "Unknown input format: %s\n", arg);
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
-static void opt_frame_pix_fmt(const char *arg)
+static int opt_frame_pix_fmt(const char *opt, const char *arg)
 {
     frame_pix_fmt = av_get_pix_fmt(arg);
+    return 0;
 }
 
 static int opt_sync(const char *opt, const char *arg)
@@ -2926,8 +2929,8 @@ static int opt_thread_count(const char *opt, const char *arg)
 
 static const OptionDef options[] = {
 #include "cmdutils_common_opts.h"
-    { "x", HAS_ARG | OPT_FUNC2, {(void*)opt_width}, "force displayed width", "width" },
-    { "y", HAS_ARG | OPT_FUNC2, {(void*)opt_height}, "force displayed height", "height" },
+    { "x", HAS_ARG, {(void*)opt_width}, "force displayed width", "width" },
+    { "y", HAS_ARG, {(void*)opt_height}, "force displayed height", "height" },
     { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
     { "fs", OPT_BOOL, {(void*)&is_full_screen}, "force full screen" },
     { "an", OPT_BOOL, {(void*)&audio_disable}, "disable audio" },
@@ -2935,16 +2938,16 @@ static const OptionDef options[] = {
     { "ast", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_AUDIO]}, "select desired audio stream", "stream_number" },
     { "vst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_VIDEO]}, "select desired video stream", "stream_number" },
     { "sst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_SUBTITLE]}, "select desired subtitle stream", "stream_number" },
-    { "ss", HAS_ARG | OPT_FUNC2, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" },
-    { "t", HAS_ARG | OPT_FUNC2, {(void*)&opt_duration}, "play  \"duration\" seconds of audio/video", "duration" },
+    { "ss", HAS_ARG, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" },
+    { "t", HAS_ARG, {(void*)&opt_duration}, "play  \"duration\" seconds of audio/video", "duration" },
     { "bytes", OPT_INT | HAS_ARG, {(void*)&seek_by_bytes}, "seek by bytes 0=off 1=on -1=auto", "val" },
     { "nodisp", OPT_BOOL, {(void*)&display_disable}, "disable graphical display" },
     { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format", "format" },
     { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" },
-    { "debug", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
+    { "debug", HAS_ARG | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
     { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" },
-    { "vismv", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" },
+    { "vismv", HAS_ARG | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" },
     { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" },
     { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" },
     { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&decoder_reorder_pts}, "let decoder reorder pts 0=off 1=on -1=auto", ""},
@@ -2955,8 +2958,8 @@ static const OptionDef options[] = {
     { "idct", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&idct}, "set idct algo",  "algo" },
     { "er", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_recognition}, "set error detection threshold (0-4)",  "threshold" },
     { "ec", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_concealment}, "set error concealment options",  "bit_mask" },
-    { "sync", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
-    { "threads", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
+    { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
+    { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
     { "autoexit", OPT_BOOL | OPT_EXPERT, {(void*)&autoexit}, "exit at the end", "" },
     { "exitonkeydown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_keydown}, "exit on key down", "" },
     { "exitonmousedown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_mousedown}, "exit on mouse down", "" },
@@ -2967,7 +2970,7 @@ static const OptionDef options[] = {
     { "vf", OPT_STRING | HAS_ARG, {(void*)&vfilters}, "video filters", "filter list" },
 #endif
     { "rdftspeed", OPT_INT | HAS_ARG| OPT_AUDIO | OPT_EXPERT, {(void*)&rdftspeed}, "rdft speed", "msecs" },
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { "i", 0, {NULL}, "ffmpeg compatibility dummy option", ""},
     { NULL, },
 };
diff --git a/ffprobe.c b/ffprobe.c
index b5d19f06d6..508800e8d2 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -326,13 +326,14 @@ static void show_usage(void)
     printf("\n");
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     iformat = av_find_input_format(arg);
     if (!iformat) {
         fprintf(stderr, "Unknown input format: %s\n", arg);
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static void opt_input_file(const char *arg)
@@ -379,7 +380,7 @@ static const OptionDef options[] = {
     { "show_format",  OPT_BOOL, {(void*)&do_show_format} , "show format/container info" },
     { "show_packets", OPT_BOOL, {(void*)&do_show_packets}, "show packets info" },
     { "show_streams", OPT_BOOL, {(void*)&do_show_streams}, "show streams info" },
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { NULL, },
 };
 

From 8f653e28f54d6629c22a7135dff4c32dfc95acda Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 02:39:20 +0200
Subject: [PATCH 409/830] lavu: add av_get_pix_fmt_name() convenience function

Also deprecate avcodec_get_pix_fmt_name() in its favor.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 doc/APIchanges          | 4 ++++
 libavcodec/avcodec.h    | 8 ++++++++
 libavcodec/imgconvert.c | 7 +++----
 libavcodec/version.h    | 3 +++
 libavutil/avutil.h      | 2 +-
 libavutil/pixdesc.c     | 6 ++++++
 libavutil/pixdesc.h     | 9 +++++++++
 7 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 90dd0ec10e..d0d4f4f798 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-XX - xxxxxx - lavu 51.X.0 - pixdesc.h
+  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
+  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
+
 2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
   Add fps_probe_size to AVFormatContext.
 
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index e067ee0273..ce3a4a6b94 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3306,7 +3306,15 @@ int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width,
  */
 int avpicture_get_size(enum PixelFormat pix_fmt, int width, int height);
 void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift);
+
+#if FF_API_GET_PIX_FMT_NAME
+/**
+ * @deprecated Deprecated in favor of av_get_pix_fmt_name().
+ */
+attribute_deprecated
 const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt);
+#endif
+
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
 
 /**
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 23e4318c71..351ed7ada7 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -417,13 +417,12 @@ void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *
     *v_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h;
 }
 
+#if FF_API_GET_PIX_FMT_NAME
 const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt)
 {
-    if ((unsigned)pix_fmt >= PIX_FMT_NB)
-        return NULL;
-    else
-        return av_pix_fmt_descriptors[pix_fmt].name;
+    return av_get_pix_fmt_name(pix_fmt);
 }
+#endif
 
 int ff_is_hwaccel_pix_fmt(enum PixelFormat pix_fmt)
 {
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 894188b093..aded68e83e 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -65,5 +65,8 @@
 #ifndef FF_API_FLAC_GLOBAL_OPTS
 #define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_GET_PIX_FMT_NAME
+#define FF_API_GET_PIX_FMT_NAME (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 43f0815fd2..8b8ca40c94 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  1
+#define LIBAVUTIL_VERSION_MINOR  2
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index bff45e522a..77584755d6 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -906,6 +906,12 @@ static enum PixelFormat get_pix_fmt_internal(const char *name)
     return PIX_FMT_NONE;
 }
 
+const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt)
+{
+    return (unsigned)pix_fmt < PIX_FMT_NB ?
+        av_pix_fmt_descriptors[pix_fmt].name : NULL;
+}
+
 #if HAVE_BIGENDIAN
 #   define X_NE(be, le) be
 #else
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index cb75641058..979a23539b 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -23,6 +23,7 @@
 #define AVUTIL_PIXDESC_H
 
 #include <inttypes.h>
+#include "pixfmt.h"
 
 typedef struct AVComponentDescriptor{
     uint16_t plane        :2;            ///< which of the 4 planes contains the component
@@ -141,6 +142,14 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi
  */
 enum PixelFormat av_get_pix_fmt(const char *name);
 
+/**
+ * Return the short name for a pixel format, NULL in case pix_fmt is
+ * unknown.
+ *
+ * @see av_get_pix_fmt(), av_get_pix_fmt_string()
+ */
+const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt);
+
 /**
  * Print in buf the string corresponding to the pixel format with
  * number pix_fmt, or an header if pix_fmt is negative.

From 04de5bf56c1f1f946272f436d9c8cb82c63d30b4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sat, 28 May 2011 09:46:42 -0400
Subject: [PATCH 410/830] APIchanges: fill in git hash for av_get_pix_fmt_name
 (0420bd7).

---
 doc/APIchanges | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index d0d4f4f798..a55b15284b 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,7 +13,7 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-05-XX - xxxxxx - lavu 51.X.0 - pixdesc.h
+2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
 

From 7f7726c7a23f8ca9d8cd5d2511356d724d4284a3 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 20:31:57 +0100
Subject: [PATCH 411/830] ac3enc: use LOCAL_ALIGNED macro

Aligned local variables must use the LOCAL_ALIGNED macro to ensure
correct alignment on all systems.  The unusual size argument to
memset is required since the implementation of LOCAL_ALIGNED may
use a pointer to an array.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/ac3enc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 5285874caa..c85c69d248 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -546,12 +546,15 @@ static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
 static void apply_channel_coupling(AC3EncodeContext *s)
 {
 #if CONFIG_AC3ENC_FLOAT
-    DECLARE_ALIGNED(16, float,   cpl_coords)      [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
-    DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+    LOCAL_ALIGNED_16(float,   cpl_coords,       [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+    LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
     int blk, ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
     int num_cpl_coefs = s->num_cpl_subbands * 12;
 
+    memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
+    memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
+
     /* calculate coupling channel from fbw channels */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];

From a84f82560e3fa6aa41de64b0a5b75d1d607599aa Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 21:43:15 +0100
Subject: [PATCH 412/830] ARM: improve FASTDIV asm

This uses one register less.  Also add missing "cc" clobber.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/arm/intmath.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index cc3de90f04..4340b59075 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -33,12 +33,12 @@
 #define FASTDIV FASTDIV
 static av_always_inline av_const int FASTDIV(int a, int b)
 {
-    int r, t;
-    __asm__ ("cmp     %3, #2               \n\t"
-             "ldr     %1, [%4, %3, lsl #2] \n\t"
-             "lsrle   %0, %2, #1           \n\t"
-             "smmulgt %0, %1, %2           \n\t"
-             : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    int r;
+    __asm__ ("cmp     %2, #2               \n\t"
+             "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "lsrle   %0, %1, #1           \n\t"
+             "smmulgt %0, %0, %1           \n\t"
+             : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
     return r;
 }
 

From 5726ec171bf7cb833f8445d9a294f23fc047a549 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 21:59:46 +0100
Subject: [PATCH 413/830] ARM: add "cc" clobbers to inline asm where needed

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/aac.h        | 2 +-
 libavcodec/arm/mathops.h    | 3 ++-
 libavcodec/arm/vp56_arith.h | 6 ++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index e907c9ea25..8355e9c193 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -128,7 +128,7 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                       : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
                         "+r"(sign), "=r"(nz)
                       : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3", "d4", "d5");
+                      : "cc", "d0", "d1", "d2", "d3", "d4", "d5");
     return dst;
 }
 
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 1fce8e3267..858e73d677 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -107,7 +107,8 @@ static inline av_const int mid_pred(int a, int b, int c)
         "cmp   %0, %1  \n\t"
         "movgt %0, %1  \n\t"
         : "=&r"(m), "+r"(a)
-        : "r"(b), "r"(c));
+        : "r"(b), "r"(c)
+        : "cc");
     return m;
 }
 
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index 0591d614a9..8785a77860 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -48,7 +48,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
                       : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
                         "+&r"(c->bits), "+&r"(c->buffer)
                       : "r"(high), "r"(pr), "r"(c->end - 1),
-                        "0"(shift), "1"(code_word));
+                        "0"(shift), "1"(code_word)
+                      : "cc");
 
     return bit;
 }
@@ -74,7 +75,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
                       "lsl     %2,  %0,  #16          \n"
                       : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
                         "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift));
+                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)
+                      : "cc");
 
     if (code_word >= tmp) {
         c->high      = high - low;

From 84e4804ad08a6371e9368c7db8c5d9e2c81e175d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 22:49:28 +0100
Subject: [PATCH 414/830] ARM: remove unnecessary volatile from inline asm

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mathops.h    |  2 +-
 libavcodec/arm/vp56_arith.h | 66 ++++++++++++++++++-------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 858e73d677..3a7a1f3ee9 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -97,7 +97,7 @@ static inline av_const int MUL16(int ra, int rb)
 static inline av_const int mid_pred(int a, int b, int c)
 {
     int m;
-    __asm__ volatile (
+    __asm__ (
         "mov   %0, %2  \n\t"
         "cmp   %1, %2  \n\t"
         "movgt %0, %1  \n\t"
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index 8785a77860..9ce3fd0d91 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -31,25 +31,25 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
     unsigned high      = c->high << shift;
     unsigned bit;
 
-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "cmp     %1,  %0,  lsl #16      \n"
-                      "subge   %1,  %1,  %0,  lsl #16 \n"
-                      "subge   %0,  %5,  %0           \n"
-                      "movge   %2,  #1                \n"
-                      "movlt   %2,  #0                \n"
-                      : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1),
-                        "0"(shift), "1"(code_word)
-                      : "cc");
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "cmp     %1,  %0,  lsl #16      \n"
+             "subge   %1,  %1,  %0,  lsl #16 \n"
+             "subge   %0,  %5,  %0           \n"
+             "movge   %2,  #1                \n"
+             "movlt   %2,  #0                \n"
+             : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1),
+               "0"(shift), "1"(code_word)
+             : "cc");
 
     return bit;
 }
@@ -63,20 +63,20 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
     unsigned low;
     unsigned tmp;
 
-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "lsl     %2,  %0,  #16          \n"
-                      : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)
-                      : "cc");
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "lsl     %2,  %0,  #16          \n"
+             : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)
+             : "cc");
 
     if (code_word >= tmp) {
         c->high      = high - low;

From 7d8c17b5f62bc14d5f7c7c792630b23240b47eec Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 27 May 2011 22:52:49 +0100
Subject: [PATCH 415/830] ARM: aacdec: fix constraints on inline asm

This adds output operands for modified memory allowing the
volatile qualifiers to be dropped.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/aac.h | 153 ++++++++++++++++++++++---------------------
 1 file changed, 78 insertions(+), 75 deletions(-)

diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index 8355e9c193..6d5df4933a 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -30,17 +30,17 @@ static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
     unsigned v0, v1;
-    __asm__ volatile ("ubfx     %0,  %4,  #0, #4      \n\t"
-                      "ubfx     %1,  %4,  #4, #4      \n\t"
-                      "ldr      %0,  [%3, %0, lsl #2] \n\t"
-                      "ldr      %1,  [%3, %1, lsl #2] \n\t"
-                      "vld1.32  {d1[]},   [%5,:32]    \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%2,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1");
+    __asm__ ("ubfx     %0,  %6,  #0, #4      \n\t"
+             "ubfx     %1,  %6,  #4, #4      \n\t"
+             "ldr      %0,  [%5, %0, lsl #2] \n\t"
+             "ldr      %1,  [%5, %1, lsl #2] \n\t"
+             "vld1.32  {d1[]},   [%7,:32]    \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%2,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "+r"(dst), "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1");
     return dst;
 }
 
@@ -49,22 +49,23 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
     unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #2      \n\t"
-                      "ubfx     %1,  %6,  #2, #2      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %6,  #4, #2      \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %6,  #6, #2      \n\t"
-                      "ldr      %2,  [%5, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%5, %3, lsl #2] \n\t"
-                      "vld1.32  {d2[],d3[]},[%7,:32]  \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3");
+    __asm__ ("ubfx     %0,  %10, #0, #2      \n\t"
+             "ubfx     %1,  %10, #2, #2      \n\t"
+             "ldr      %0,  [%9, %0, lsl #2] \n\t"
+             "ubfx     %2,  %10, #4, #2      \n\t"
+             "ldr      %1,  [%9, %1, lsl #2] \n\t"
+             "ubfx     %3,  %10, #6, #2      \n\t"
+             "ldr      %2,  [%9, %2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%9, %3, lsl #2] \n\t"
+             "vld1.32  {d2[],d3[]},[%11,:32] \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1", "d2", "d3");
     return dst;
 }
 
@@ -73,22 +74,23 @@ static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
     unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #4      \n\t"
-                      "ubfx     %1,  %6,  #4, #4      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "lsl      %2,  %8,  #30         \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "lsl      %3,  %8,  #31         \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "bic      %2,  %2,  #1<<30      \n\t"
-                      "vld1.32  {d1[]},   [%7,:32]    \n\t"
-                      "vmov     d2,  %2,  %3          \n\t"
-                      "veor     d0,  d0,  d2          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%4,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale), "r"(sign)
-                      : "d0", "d1", "d2");
+    __asm__ ("ubfx     %0,  %8,  #0, #4      \n\t"
+             "ubfx     %1,  %8,  #4, #4      \n\t"
+             "ldr      %0,  [%7, %0, lsl #2] \n\t"
+             "lsl      %2,  %10, #30         \n\t"
+             "ldr      %1,  [%7, %1, lsl #2] \n\t"
+             "lsl      %3,  %10, #31         \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "bic      %2,  %2,  #1<<30      \n\t"
+             "vld1.32  {d1[]},   [%9,:32]    \n\t"
+             "vmov     d2,  %2,  %3          \n\t"
+             "veor     d0,  d0,  d2          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%4,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale), "r"(sign)
+             : "d0", "d1", "d2");
     return dst;
 }
 
@@ -97,38 +99,39 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
     unsigned v0, v1, v2, v3, nz;
-    __asm__ volatile ("vld1.32  {d2[],d3[]},[%9,:32]  \n\t"
-                      "ubfx     %0,  %8,  #0, #2      \n\t"
-                      "ubfx     %1,  %8,  #2, #2      \n\t"
-                      "ldr      %0,  [%7, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %8,  #4, #2      \n\t"
-                      "ldr      %1,  [%7, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %8,  #6, #2      \n\t"
-                      "ldr      %2,  [%7, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%7, %3, lsl #2] \n\t"
-                      "lsr      %6,  %8,  #12         \n\t"
-                      "rbit     %6,  %6               \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %0,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %1,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %2,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "vmov     d4,  %0,  %1          \n\t"
-                      "and      %3,  %5,  #1<<31      \n\t"
-                      "vmov     d5,  %2,  %3          \n\t"
-                      "veor     q0,  q0,  q2          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
-                        "+r"(sign), "=r"(nz)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "cc", "d0", "d1", "d2", "d3", "d4", "d5");
+    __asm__ ("vld1.32  {d2[],d3[]},[%13,:32] \n\t"
+             "ubfx     %0,  %12, #0, #2      \n\t"
+             "ubfx     %1,  %12, #2, #2      \n\t"
+             "ldr      %0,  [%11,%0, lsl #2] \n\t"
+             "ubfx     %2,  %12, #4, #2      \n\t"
+             "ldr      %1,  [%11,%1, lsl #2] \n\t"
+             "ubfx     %3,  %12, #6, #2      \n\t"
+             "ldr      %2,  [%11,%2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%11,%3, lsl #2] \n\t"
+             "lsr      %6,  %12, #12         \n\t"
+             "rbit     %6,  %6               \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %0,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %1,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %2,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "vmov     d4,  %0,  %1          \n\t"
+             "and      %3,  %5,  #1<<31      \n\t"
+             "vmov     d5,  %2,  %3          \n\t"
+             "veor     q0,  q0,  q2          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "+r"(sign), "=r"(nz),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "cc", "d0", "d1", "d2", "d3", "d4", "d5");
     return dst;
 }
 

From b8e893399ff8755721dc117695ec5ff183c1e07b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 26 May 2011 18:57:26 +0200
Subject: [PATCH 416/830] sws: replace all long with int.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libswscale/bfin/internal_bfin.S           |   8 +-
 libswscale/bfin/swscale_bfin.c            |   8 +-
 libswscale/colorspace-test.c              |   2 +-
 libswscale/ppc/swscale_altivec_template.c |   4 +-
 libswscale/ppc/swscale_template.c         |   4 +-
 libswscale/ppc/yuv2rgb_altivec.c          |   2 +-
 libswscale/rgb2rgb.c                      | 148 +++++++++----------
 libswscale/rgb2rgb.h                      | 130 ++++++++---------
 libswscale/rgb2rgb_template.c             | 170 +++++++++++-----------
 libswscale/swscale.c                      |  52 +++----
 libswscale/swscale.h                      |   4 +-
 libswscale/swscale_internal.h             |  18 +--
 libswscale/swscale_template.c             |  52 +++----
 libswscale/x86/rgb2rgb_template.c         | 146 +++++++++----------
 libswscale/x86/swscale_template.c         |  68 ++++-----
 15 files changed, 408 insertions(+), 408 deletions(-)

diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S
index dea23215a0..9f985e7824 100644
--- a/libswscale/bfin/internal_bfin.S
+++ b/libswscale/bfin/internal_bfin.S
@@ -466,8 +466,8 @@ DEFUN_END(yuv2rgb24_line)
 #define ARG_srcStride   40
 
 DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
         link 0;
         [--sp] = (r7:4,p5:4);
 
@@ -539,8 +539,8 @@ DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8
 DEFUN_END(uyvytoyv12)
 
 DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
         link 0;
         [--sp] = (r7:4,p5:4);
 
diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index 60b7f8310b..c8030ba29f 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -38,12 +38,12 @@
 #endif
 
 int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;
 
 int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;
 
 static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t* dst[], int dstStride[])
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 4ed9164323..07c1cbd803 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -52,7 +52,7 @@ int main(int argc, char **argv)
             int src_bpp;
             int dst_bpp;
             const char *name;
-            void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
+            void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
         } func_info[] = {
             FUNC(2, 2, rgb15to16),
             FUNC(2, 3, rgb15to24),
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index eee7bdd0a5..21e3b4eafb 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -29,13 +29,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
     register int i;
     vector unsigned int altivec_vectorShiftInt19 =
         vec_add(vec_splat_u32(10), vec_splat_u32(9));
-    if ((unsigned long)dest % 16) {
+    if ((unsigned int)dest % 16) {
         /* badly aligned store, we force store alignment */
         /* and will handle load misalignment on val w/ vec_perm */
         vector unsigned char perm1;
         vector signed int v1;
         for (i = 0 ; (i < dstW) &&
-            (((unsigned long)dest + i) % 16) ; i++) {
+            (((unsigned int)dest + i) % 16) ; i++) {
                 int t = val[i] >> 19;
                 dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
         }
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 0fe97a1114..ca6777144d 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -30,7 +30,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **chrVSrc, int chrFilterSize,
                                     const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -45,7 +45,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest,
-                                       long dstW, long dstY)
+                                       int dstW, int dstY)
 {
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index abd49c9e96..0cc0d3084d 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -793,7 +793,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
 
     vector signed short   RND = vec_splat_s16(1<<3);
     vector unsigned short SCL = vec_splat_u16(4);
-    DECLARE_ALIGNED(16, unsigned long, scratch)[16];
+    DECLARE_ALIGNED(16, unsigned int, scratch)[16];
 
     vector signed short *YCoeffs, *CCoeffs;
 
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index e18cd51011..51cb600e6b 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -29,70 +29,70 @@
 #include "swscale.h"
 #include "swscale_internal.h"
 
-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
 
 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                   long width, long height,
-                   long lumStride, long chromStride, long srcStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int srcStride);
 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                    long width, long height,
-                    long lumStride, long chromStride, long srcStride);
-void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                 long srcStride, long dstStride);
+                    int width, int height,
+                    int lumStride, int chromStride, int srcStride);
+void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                 int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                        long width, long height, long src1Stride,
-                        long src2Stride, long dstStride);
+                        int width, int height, int src1Stride,
+                        int src2Stride, int dstStride);
 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                     uint8_t *dst1, uint8_t *dst2,
-                    long width, long height,
-                    long srcStride1, long srcStride2,
-                    long dstStride1, long dstStride2);
+                    int width, int height,
+                    int srcStride1, int srcStride2,
+                    int dstStride1, int dstStride2);
 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                      uint8_t *dst,
-                     long width, long height,
-                     long srcStride1, long srcStride2,
-                     long srcStride3, long dstStride);
+                     int width, int height,
+                     int srcStride1, int srcStride2,
+                     int srcStride3, int dstStride);
 void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 
 #define RGB2YUV_SHIFT 8
 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
@@ -123,10 +123,10 @@ void sws_rgb2rgb_init(void)
         rgb2rgb_init_x86();
 }
 
-void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 2;
+    int i;
+    int num_pixels = src_size >> 2;
     for (i=0; i<num_pixels; i++) {
 #if HAVE_BIGENDIAN
         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
@@ -141,9 +141,9 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
+    int i;
     for (i=0; 3*i<src_size; i++) {
 #if HAVE_BIGENDIAN
         /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
@@ -160,7 +160,7 @@ void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -183,7 +183,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -198,10 +198,10 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -209,10 +209,10 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -220,7 +220,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -243,7 +243,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -258,10 +258,10 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -269,10 +269,10 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned br;
@@ -282,10 +282,10 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
+void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size;
+    int i;
+    int num_pixels = src_size;
     for (i=0; i<num_pixels; i++) {
         unsigned b,g,r;
         register uint8_t rgb;
@@ -298,9 +298,9 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
 }
 
 #define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, long src_size) \
+void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \
 {                                                                       \
-    long i;                                                             \
+    int i;                                                             \
                                                                         \
     for (i = 0; i < src_size; i+=4) {                                   \
         dst[i + 0] = src[i + a];                                        \
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 6d6154b23b..9d051de4f6 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -32,83 +32,83 @@
 #include "libavutil/avutil.h"
 
 /* A full collection of RGB to RGB(BGR) converters */
-extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
 
-void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb24to32   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb32to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void bgr8torgb8  (const uint8_t *src, uint8_t *dst, int src_size);
 
-void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size);
+void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);
 
 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride);
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);
 
 /**
  * Width should be a multiple of 16.
  */
 extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                          long width, long height,
-                          long lumStride, long chromStride, long srcStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int srcStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);
 
 /**
  * Width should be a multiple of 16.
  */
 extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -117,40 +117,40 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin
  * FIXME: Write high quality version.
  */
 extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           long width, long height,
-                           long lumStride, long chromStride, long srcStride);
-extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                        long srcStride, long dstStride);
+                           int width, int height,
+                           int lumStride, int chromStride, int srcStride);
+extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                        int srcStride, int dstStride);
 
 extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                               long width, long height, long src1Stride,
-                               long src2Stride, long dstStride);
+                               int width, int height, int src1Stride,
+                               int src2Stride, int dstStride);
 
 extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                            uint8_t *dst1, uint8_t *dst2,
-                           long width, long height,
-                           long srcStride1, long srcStride2,
-                           long dstStride1, long dstStride2);
+                           int width, int height,
+                           int srcStride1, int srcStride2,
+                           int dstStride1, int dstStride2);
 
 extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                             uint8_t *dst,
-                            long width, long height,
-                            long srcStride1, long srcStride2,
-                            long srcStride3, long dstStride);
+                            int width, int height,
+                            int srcStride1, int srcStride2,
+                            int srcStride3, int dstStride);
 
 
 extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 
 void sws_rgb2rgb_init(void);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index fed7e5e195..0a226a2960 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -26,7 +26,7 @@
 
 #include <stddef.h>
 
-static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -50,7 +50,7 @@ static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -81,7 +81,7 @@ static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -101,7 +101,7 @@ static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -122,7 +122,7 @@ static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -135,7 +135,7 @@ static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -147,7 +147,7 @@ static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -159,7 +159,7 @@ static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -171,7 +171,7 @@ static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -185,7 +185,7 @@ static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -199,7 +199,7 @@ static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -213,7 +213,7 @@ static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -248,7 +248,7 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
        |
    original bits
 */
-static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -263,7 +263,7 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = (uint8_t *)dst;
@@ -278,7 +278,7 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -301,7 +301,7 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -324,7 +324,7 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int idx = 15 - src_size;
     const uint8_t *s = src-idx;
@@ -336,7 +336,7 @@ static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
     for (i=0; i<src_size; i+=3) {
@@ -350,11 +350,11 @@ static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
 
 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                      const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@@ -404,9 +404,9 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -415,11 +415,11 @@ static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 
 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                      const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@@ -469,9 +469,9 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -483,9 +483,9 @@ static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -496,9 +496,9 @@ static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -510,14 +510,14 @@ static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             ydst[2*i+0]     = src[4*i+0];
             udst[i]     = src[4*i+1];
@@ -538,10 +538,10 @@ static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
     }
 }
 
-static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
-                              long srcHeight, long srcStride, long dstStride)
+static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
+                              int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;
 
     dst[0]= src[0];
 
@@ -598,14 +598,14 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
  */
 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             udst[i]     = src[4*i+0];
             ydst[2*i+0] = src[4*i+1];
@@ -634,14 +634,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  * FIXME: Write HQ version.
  */
 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride)
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     y=0;
     for (; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             unsigned int b = src[6*i+0];
             unsigned int g = src[6*i+1];
@@ -689,14 +689,14 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 }
 
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                              uint8_t *dest, long width,
-                              long height, long src1Stride,
-                              long src2Stride, long dstStride)
+                              uint8_t *dest, int width,
+                              int height, int src1Stride,
+                              int src2Stride, int dstStride)
 {
-    long h;
+    int h;
 
     for (h=0; h < height; h++) {
-        long w;
+        int w;
         for (w=0; w < width; w++) {
             dest[2*w+0] = src1[w];
             dest[2*w+1] = src2[w];
@@ -709,12 +709,12 @@ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
 
 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
                                  uint8_t *dst1, uint8_t *dst2,
-                                 long width, long height,
-                                 long srcStride1, long srcStride2,
-                                 long dstStride1, long dstStride2)
+                                 int width, int height,
+                                 int srcStride1, int srcStride2,
+                                 int dstStride1, int dstStride2)
 {
     int y;
-    long x,w,h;
+    int x,w,h;
     w=width/2; h=height/2;
     for (y=0;y<h;y++) {
         const uint8_t* s1=src1+srcStride1*(y>>1);
@@ -732,12 +732,12 @@ static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
 
 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
                                   const uint8_t *src3, uint8_t *dst,
-                                  long width, long height,
-                                  long srcStride1, long srcStride2,
-                                  long srcStride3, long dstStride)
+                                  int width, int height,
+                                  int srcStride1, int srcStride2,
+                                  int srcStride3, int dstStride)
 {
     int x;
-    long y,w,h;
+    int y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++) {
         const uint8_t* yp=src1+srcStride1*y;
@@ -746,7 +746,7 @@ static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
         uint8_t* d=dst+dstStride*y;
         x=0;
         for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
             d[8*x+0] = yp[x2];
             d[8*x+1] = up[x];
             d[8*x+2] = yp[x2+1];
@@ -833,11 +833,11 @@ static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
 }
 
 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src, ydst, width);
@@ -853,11 +853,11 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src, ydst, width);
@@ -871,11 +871,11 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src + 1, ydst, width);
@@ -891,11 +891,11 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src + 1, ydst, width);
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 54a75971be..02cf7cb382 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -980,7 +980,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width,
                             uint32_t *unused)
 {
     int i;
@@ -995,7 +995,7 @@ static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
 
 static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
-                             long width, uint32_t *unused)
+                             int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1011,7 +1011,7 @@ static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
 
 static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
                                   const uint8_t *src1, const uint8_t *src2,
-                                  long width, uint32_t *unused)
+                                  int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1025,7 +1025,7 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, int width,
                             uint32_t *unused)
 {
     int i;
@@ -1040,7 +1040,7 @@ static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
 
 static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
-                             long width, uint32_t *unused)
+                             int width, uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1055,7 +1055,7 @@ static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
 
 static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
                                   const uint8_t *src1, const uint8_t *src2,
-                                  long width, uint32_t *unused)
+                                  int width, uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1069,7 +1069,7 @@ static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
 }
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+static inline void name(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1090,7 +1090,7 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void abgrToA(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1099,7 +1099,7 @@ static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_
 }
 
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1111,7 +1111,7 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
     }\
 }\
-static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1138,7 +1138,7 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
-static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToY(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1150,7 +1150,7 @@ static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t
 
 static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
                            const uint8_t *src1, const uint8_t *src2,
-                           long width, uint32_t *pal)
+                           int width, uint32_t *pal)
 {
     int i;
     assert(src1 == src2);
@@ -1162,7 +1162,7 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1172,7 +1172,7 @@ static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uin
     }
 }
 
-static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1365,24 +1365,24 @@ static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStrid
     return srcSliceH;
 }
 
-static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
 }
 
-static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
 }
 
-static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++) {
         //FIXME slow?
@@ -1398,7 +1398,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 {
     const enum PixelFormat srcFormat= c->srcFormat;
     const enum PixelFormat dstFormat= c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
+    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
                  const uint8_t *palette)=NULL;
     int i;
     uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
@@ -1455,7 +1455,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
     const int dstBpp= (c->dstFormatBpp + 7) >> 3;
     const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
     const int dstId= c->dstFormatBpp >> 2;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
+    void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
 
 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
 
@@ -2057,18 +2057,18 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[],
 }
 
 /* Convert the palette to the same packed 32-bit format as the palette */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
 }
 
 /* Palette format: ABCD -> dst format: ABC */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++) {
         //FIXME slow?
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 6ab59c6a59..b0ad912a08 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -335,7 +335,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
  * @param num_pixels number of pixels to convert
  * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
  */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
 
 /**
  * Converts an 8bit paletted frame into a frame with a color depth of 24 bits.
@@ -347,7 +347,7 @@ void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pi
  * @param num_pixels number of pixels to convert
  * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
  */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
 
 
 #endif /* SWSCALE_SWSCALE_H */
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index d41828a4ca..c17d55023d 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -261,7 +261,7 @@ typedef struct SwsContext {
                         const int16_t *chrVSrc, const int16_t *alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
     void (*yuv2yuvX   )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -269,7 +269,7 @@ typedef struct SwsContext {
                         const int16_t **alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
     void (*yuv2packed1)(struct SwsContext *c,
                         const uint16_t *buf0,
                         const uint16_t *ubuf0, const uint16_t *ubuf1,
@@ -289,26 +289,26 @@ typedef struct SwsContext {
                         const int16_t *chrFilter, const int16_t **chrUSrc,
                         const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc, uint8_t *dest,
-                        long dstW, long dstY);
+                        int dstW, int dstY);
 
     void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
     void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
     void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
                       const uint8_t *src1, const uint8_t *src2,
-                      long width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
     void (*hyscale_fast)(struct SwsContext *c,
-                         int16_t *dst, long dstWidth,
+                         int16_t *dst, int dstWidth,
                          const uint8_t *src, int srcW, int xInc);
     void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst1, int16_t *dst2, long dstWidth,
+                         int16_t *dst1, int16_t *dst2, int dstWidth,
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
 
     void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW,
                    int xInc, const int16_t *filter, const int16_t *filterPos,
-                   long filterSize);
+                   int filterSize);
 
     void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
     void (*chrConvertRange)(uint16_t *dst1, uint16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 29836f123a..d05b9a1929 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -24,7 +24,7 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -47,7 +47,7 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                               const int16_t *chrUSrc, const int16_t *chrVSrc,
                               const int16_t *alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -79,7 +79,7 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                                  const int16_t *chrFilter, const int16_t **chrUSrc,
                                  const int16_t **chrVSrc,
                                  int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, long dstW, long dstY)
+                                 uint8_t *dest, int dstW, int dstY)
 {
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                        chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -128,7 +128,7 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
                              uint32_t *unused)
 {
     int i;
@@ -137,7 +137,7 @@ static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
 }
 
 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -148,7 +148,7 @@ static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
@@ -161,7 +161,7 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
                              uint32_t *unused)
 {
     int i;
@@ -170,7 +170,7 @@ static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
 }
 
 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -181,7 +181,7 @@ static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -191,7 +191,7 @@ static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                              const uint8_t *src, long width)
+                              const uint8_t *src, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -202,14 +202,14 @@ static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
 
 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
                               const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstU, dstV, src1, width);
 }
 
 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
                               const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
@@ -218,7 +218,7 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 #define YUV_NBPS(depth, endianness, rfunc) \
 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
                                           const uint8_t *_srcU, const uint8_t *_srcV, \
-                                          long width, uint32_t *unused) \
+                                          int width, uint32_t *unused) \
 { \
     int i; \
     const uint16_t *srcU = (const uint16_t*)_srcU; \
@@ -229,7 +229,7 @@ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
     } \
 } \
 \
-static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
+static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, int width, uint32_t *unused) \
 { \
     int i; \
     const uint16_t *srcY = (const uint16_t*)_srcY; \
@@ -243,7 +243,7 @@ YUV_NBPS(10, LE, AV_RL16)
 YUV_NBPS(10, BE, AV_RB16)
 
 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -256,7 +256,7 @@ static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
 }
 
 static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -271,7 +271,7 @@ static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
 }
 
 static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -285,7 +285,7 @@ static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
                               uint32_t *unused)
 {
     int i;
@@ -299,7 +299,7 @@ static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
 }
 
 static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -314,7 +314,7 @@ static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
 }
 
 static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -333,7 +333,7 @@ static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
                             int srcW, int xInc,
                             const int16_t *filter, const int16_t *filterPos,
-                            long filterSize)
+                            int filterSize)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -380,7 +380,7 @@ static void lumRangeFromJpeg_c(uint16_t *dst, int width)
         dst[i] = (dst[i]*14071 + 33561947)>>14;
 }
 
-static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
+static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
                                   const uint8_t *src, int srcW, int xInc)
 {
     int i;
@@ -394,14 +394,14 @@ static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
 }
 
       // *** horizontal scale Y line to temp buffer
-static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
                              const uint8_t *src, int srcW, int xInc,
                              const int16_t *hLumFilter,
                              const int16_t *hLumFilterPos, int hLumFilterSize,
                              uint8_t *formatConvBuffer,
                              uint32_t *pal, int isAlpha)
 {
-    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
     src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
@@ -422,7 +422,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
 }
 
 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                  long dstWidth, const uint8_t *src1,
+                                  int dstWidth, const uint8_t *src1,
                                   const uint8_t *src2, int srcW, int xInc)
 {
     int i;
@@ -436,7 +436,7 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
     }
 }
 
-inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth,
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
                              const uint8_t *src1, const uint8_t *src2,
                              int srcW, int xInc, const int16_t *hChrFilter,
                              const int16_t *hChrFilterPos, int hChrFilterSize,
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 70673f75d1..c255610193 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -61,7 +61,7 @@
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -143,7 +143,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
             MOVNTQ"     %%mm4, 16%0"
 
 
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -186,7 +186,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -230,7 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -279,7 +279,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -371,7 +371,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -426,7 +426,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -518,7 +518,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -573,7 +573,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -630,7 +630,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -687,7 +687,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -744,7 +744,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -822,7 +822,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
        |
    original bits
 */
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -925,7 +925,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1046,7 +1046,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
     MOVNTQ"     %%mm0,  %0      \n\t"                               \
     MOVNTQ"     %%mm3, 8%0      \n\t"                               \
 
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1088,7 +1088,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1130,7 +1130,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     x86_reg idx = 15 - src_size;
     const uint8_t *s = src-idx;
@@ -1192,7 +1192,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
     }
 }
 
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
     x86_reg mmx_size= 23 - src_size;
@@ -1260,10 +1260,10 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
 }
 
 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@@ -1317,18 +1317,18 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
 }
 
 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@@ -1382,8 +1382,8 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
@@ -1393,8 +1393,8 @@ static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
  * Width should be a multiple of 16.
  */
 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@@ -1403,8 +1403,8 @@ static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usr
  * Width should be a multiple of 16.
  */
 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@@ -1414,10 +1414,10 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
@@ -1513,9 +1513,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;
 
     dst[0]= src[0];
 
@@ -1612,10 +1612,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
  * FIXME: Write HQ version.
  */
 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
@@ -1718,13 +1718,13 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
  * FIXME: Write HQ version.
  */
 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       long width, long height,
-                                       long lumStride, long chromStride, long srcStride)
+                                       int width, int height,
+                                       int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height-2; y+=2) {
-        long i;
+        int i;
         for (i=0; i<2; i++) {
             __asm__ volatile(
                 "mov                        %2, %%"REG_a"   \n\t"
@@ -1963,13 +1963,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
-                                    long width, long height, long src1Stride,
-                                    long src2Stride, long dstStride)
+                                    int width, int height, int src1Stride,
+                                    int src2Stride, int dstStride)
 {
-    long h;
+    int h;
 
     for (h=0; h < height; h++) {
-        long w;
+        int w;
 
 #if COMPILE_TEMPLATE_SSE2
         __asm__(
@@ -2037,12 +2037,12 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
-                                       long width, long height,
-                                       long srcStride1, long srcStride2,
-                                       long dstStride1, long dstStride2)
+                                       int width, int height,
+                                       int srcStride1, int srcStride2,
+                                       int dstStride1, int dstStride2)
 {
     x86_reg y;
-    long x,w,h;
+    int x,w,h;
     w=width/2; h=height/2;
     __asm__ volatile(
         PREFETCH" %0    \n\t"
@@ -2131,12 +2131,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
 
 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                                         uint8_t *dst,
-                                        long width, long height,
-                                        long srcStride1, long srcStride2,
-                                        long srcStride3, long dstStride)
+                                        int width, int height,
+                                        int srcStride1, int srcStride2,
+                                        int srcStride3, int dstStride)
 {
     x86_reg x;
-    long y,w,h;
+    int y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++) {
         const uint8_t* yp=src1+srcStride1*y;
@@ -2197,7 +2197,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
                 :"memory");
         }
         for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
             d[8*x+0] = yp[x2];
             d[8*x+1] = up[x];
             d[8*x+2] = yp[x2+1];
@@ -2459,11 +2459,11 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
 }
 
 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2485,11 +2485,11 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2509,11 +2509,11 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src+1, ydst, width);
@@ -2535,11 +2535,11 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src+1, ydst, width);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ba0a1978cc..4ac59d5fb3 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -78,7 +78,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **chrVSrc,
                                     int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -159,7 +159,7 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -190,9 +190,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                     const int16_t *chrUSrc, const int16_t *chrVSrc,
                                     const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
     const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@@ -231,9 +231,9 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                        const int16_t *chrUSrc, const int16_t *chrVSrc,
                                        const int16_t *alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
     const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@@ -471,7 +471,7 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -504,7 +504,7 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -561,7 +561,7 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
                                            const int16_t *chrFilter, const int16_t **chrUSrc,
                                            const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -585,7 +585,7 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t *chrFilter, const int16_t **chrUSrc,
                                         const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -638,7 +638,7 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
                                            const int16_t *chrFilter, const int16_t **chrUSrc,
                                            const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -662,7 +662,7 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t *chrFilter, const int16_t **chrUSrc,
                                         const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -795,7 +795,7 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -819,7 +819,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -860,7 +860,7 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
                                             const int16_t *chrFilter, const int16_t **chrUSrc,
                                             const int16_t **chrVSrc,
                                             int chrFilterSize, const int16_t **alpSrc,
-                                            uint8_t *dest, long dstW, long dstY)
+                                            uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -881,7 +881,7 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
                                          const int16_t *chrFilter, const int16_t **chrUSrc,
                                          const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
-                                         uint8_t *dest, long dstW, long dstY)
+                                         uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -1578,7 +1578,7 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm2           \n\t"
@@ -1597,7 +1597,7 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width,
     );
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1623,7 +1623,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                    %0, %%"REG_a"       \n\t"
@@ -1649,7 +1649,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                  %0, %%"REG_a"         \n\t"
@@ -1667,7 +1667,7 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width,
     );
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1693,7 +1693,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1719,7 +1719,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 }
 
 static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, long width)
+                                    const uint8_t *src, int width)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1746,20 +1746,20 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
 
 static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
                                     const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }
 
 static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
                                     const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24) {
@@ -1812,7 +1812,7 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long w
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {
     __asm__ volatile(
         "movq                    24(%4), %%mm6       \n\t"
@@ -1870,23 +1870,23 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
     );
 }
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 }
 
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
 }
 
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     assert(src1==src2);
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -1895,7 +1895,7 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+                                  const int16_t *filter, const int16_t *filterPos, int filterSize)
 {
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
@@ -2051,7 +2051,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 
 #if COMPILE_TEMPLATE_MMX2
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src, int srcW,
+                                        int dstWidth, const uint8_t *src, int srcW,
                                         int xInc)
 {
     int32_t *filterPos = c->hLumFilterPos;
@@ -2123,7 +2123,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 }
 
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                        long dstWidth, const uint8_t *src1,
+                                        int dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
     int32_t *filterPos = c->hChrFilterPos;

From e01e05ee66f7ea26dd7574501e329aa5b8e788ef Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 28 May 2011 15:46:55 +0100
Subject: [PATCH 417/830] get_bits: add av_unused tag to cache variable

This silences numerous compiler warnings from skip_bits(),
where the cache variable is not used.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/get_bits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 185ff316bb..4136498c71 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -127,7 +127,7 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
 
 #   define OPEN_READER(name, gb)                \
     unsigned int name##_index = (gb)->index;    \
-    unsigned int name##_cache = 0
+    unsigned int av_unused name##_cache = 0
 
 #   define CLOSE_READER(name, gb) (gb)->index = name##_index
 

From eb8bc57240d5d3e4680ff1df18a0a7792e96bd0c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 24 May 2011 18:14:42 +0200
Subject: [PATCH 418/830] cmdutils: remove OPT_FUNC2

Make ff* tools only accept opt_* functions taking two arguments.

The distinction between functions with one and two arguments is quite
pointless. Simplify parse_options() code.
---
 cmdutils.c             |  10 +-
 cmdutils.h             |  12 +--
 cmdutils_common_opts.h |   2 +-
 ffmpeg.c               | 215 ++++++++++++++++++++++-------------------
 ffplay.c               |  35 ++++---
 ffprobe.c              |   7 +-
 6 files changed, 148 insertions(+), 133 deletions(-)

diff --git a/cmdutils.c b/cmdutils.c
index 8c9542accb..2bc6b7417f 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -273,15 +273,13 @@ unknown_opt:
                 *po->u.int64_arg = parse_number_or_die(opt, arg, OPT_INT64, INT64_MIN, INT64_MAX);
             } else if (po->flags & OPT_FLOAT) {
                 *po->u.float_arg = parse_number_or_die(opt, arg, OPT_FLOAT, -INFINITY, INFINITY);
-            } else if (po->flags & OPT_FUNC2) {
-                if (po->u.func2_arg(opt, arg) < 0) {
-                    fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt);
-                    exit(1);
-                }
             } else if (po->flags & OPT_DUMMY) {
                 /* Do nothing for this option */
             } else {
-                po->u.func_arg(arg);
+                if (po->u.func_arg(opt, arg) < 0) {
+                    fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt);
+                    exit(1);
+                }
             }
             if(po->flags & OPT_EXIT)
                 exit(0);
diff --git a/cmdutils.h b/cmdutils.h
index 9e5827fc2b..eea44018b6 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -121,17 +121,15 @@ typedef struct {
 #define OPT_INT    0x0080
 #define OPT_FLOAT  0x0100
 #define OPT_SUBTITLE 0x0200
-#define OPT_FUNC2  0x0400
-#define OPT_INT64  0x0800
-#define OPT_EXIT   0x1000
-#define OPT_DATA   0x2000
-#define OPT_DUMMY  0x4000
+#define OPT_INT64  0x0400
+#define OPT_EXIT   0x0800
+#define OPT_DATA   0x1000
+#define OPT_DUMMY  0x2000
      union {
-        void (*func_arg)(const char *); //FIXME passing error code as int return would be nicer then exit() in the func
         int *int_arg;
         char **str_arg;
         float *float_arg;
-        int (*func2_arg)(const char *, const char *);
+        int (*func_arg)(const char *, const char *);
         int64_t *int64_arg;
     } u;
     const char *help;
diff --git a/cmdutils_common_opts.h b/cmdutils_common_opts.h
index da309977bd..9b5e5d22cd 100644
--- a/cmdutils_common_opts.h
+++ b/cmdutils_common_opts.h
@@ -10,4 +10,4 @@
     { "protocols", OPT_EXIT, {(void*)show_protocols}, "show available protocols" },
     { "filters",   OPT_EXIT, {(void*)show_filters  }, "show available filters" },
     { "pix_fmts" , OPT_EXIT, {(void*)show_pix_fmts }, "show available pixel formats" },
-    { "loglevel", HAS_ARG | OPT_FUNC2, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" },
+    { "loglevel", HAS_ARG, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" },
diff --git a/ffmpeg.c b/ffmpeg.c
index b903f86c44..8659b1f5ba 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2810,14 +2810,16 @@ static int transcode(AVFormatContext **output_files,
     return ret;
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     last_asked_format = arg;
+    return 0;
 }
 
-static void opt_video_rc_override_string(const char *arg)
+static int opt_video_rc_override_string(const char *opt, const char *arg)
 {
     video_rc_override_string = arg;
+    return 0;
 }
 
 static int opt_me_threshold(const char *opt, const char *arg)
@@ -2859,12 +2861,13 @@ static int opt_frame_crop(const char *opt, const char *arg)
     return AVERROR(EINVAL);
 }
 
-static void opt_frame_size(const char *arg)
+static int opt_frame_size(const char *opt, const char *arg)
 {
     if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) {
         fprintf(stderr, "Incorrect frame size\n");
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static int opt_pad(const char *opt, const char *arg) {
@@ -2872,21 +2875,22 @@ static int opt_pad(const char *opt, const char *arg) {
     return -1;
 }
 
-static void opt_frame_pix_fmt(const char *arg)
+static int opt_frame_pix_fmt(const char *opt, const char *arg)
 {
     if (strcmp(arg, "list")) {
         frame_pix_fmt = av_get_pix_fmt(arg);
         if (frame_pix_fmt == PIX_FMT_NONE) {
             fprintf(stderr, "Unknown pixel format requested: %s\n", arg);
-            ffmpeg_exit(1);
+            return AVERROR(EINVAL);
         }
     } else {
         show_pix_fmts();
         ffmpeg_exit(0);
     }
+    return 0;
 }
 
-static void opt_frame_aspect_ratio(const char *arg)
+static int opt_frame_aspect_ratio(const char *opt, const char *arg)
 {
     int x = 0, y = 0;
     double ar = 0;
@@ -2905,9 +2909,10 @@ static void opt_frame_aspect_ratio(const char *arg)
 
     if (!ar) {
         fprintf(stderr, "Incorrect aspect ratio specification.\n");
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
     frame_aspect_ratio = ar;
+    return 0;
 }
 
 static int opt_metadata(const char *opt, const char *arg)
@@ -2952,13 +2957,13 @@ static int opt_thread_count(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_audio_sample_fmt(const char *arg)
+static int opt_audio_sample_fmt(const char *opt, const char *arg)
 {
     if (strcmp(arg, "list")) {
         audio_sample_fmt = av_get_sample_fmt(arg);
         if (audio_sample_fmt == AV_SAMPLE_FMT_NONE) {
             av_log(NULL, AV_LOG_ERROR, "Invalid sample format '%s'\n", arg);
-            ffmpeg_exit(1);
+            return AVERROR(EINVAL);
         }
     } else {
         int i;
@@ -2967,6 +2972,7 @@ static void opt_audio_sample_fmt(const char *arg)
             printf("%s\n", av_get_sample_fmt_string(fmt_str, sizeof(fmt_str), i));
         ffmpeg_exit(0);
     }
+    return 0;
 }
 
 static int opt_audio_rate(const char *opt, const char *arg)
@@ -2987,12 +2993,13 @@ static int opt_video_channel(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_video_standard(const char *arg)
+static int opt_video_standard(const char *opt, const char *arg)
 {
     video_standard = av_strdup(arg);
+    return 0;
 }
 
-static void opt_codec(int *pstream_copy, char **pcodec_name,
+static int opt_codec(int *pstream_copy, char **pcodec_name,
                       int codec_type, const char *arg)
 {
     av_freep(pcodec_name);
@@ -3001,26 +3008,27 @@ static void opt_codec(int *pstream_copy, char **pcodec_name,
     } else {
         *pcodec_name = av_strdup(arg);
     }
+    return 0;
 }
 
-static void opt_audio_codec(const char *arg)
+static int opt_audio_codec(const char *opt, const char *arg)
 {
-    opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg);
+    return opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg);
 }
 
-static void opt_video_codec(const char *arg)
+static int opt_video_codec(const char *opt, const char *arg)
 {
-    opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg);
+    return opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg);
 }
 
-static void opt_subtitle_codec(const char *arg)
+static int opt_subtitle_codec(const char *opt, const char *arg)
 {
-    opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg);
+    return opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg);
 }
 
-static void opt_data_codec(const char *arg)
+static int opt_data_codec(const char *opt, const char *arg)
 {
-    opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg);
+    return opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg);
 }
 
 static int opt_codec_tag(const char *opt, const char *arg)
@@ -3041,7 +3049,7 @@ static int opt_codec_tag(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_map(const char *arg)
+static int opt_map(const char *opt, const char *arg)
 {
     AVStreamMap *m;
     char *p;
@@ -3064,6 +3072,7 @@ static void opt_map(const char *arg)
         m->sync_file_index = m->file_index;
         m->sync_stream_index = m->stream_index;
     }
+    return 0;
 }
 
 static void parse_meta_type(char *arg, char *type, int *index, char **endptr)
@@ -3087,7 +3096,7 @@ static void parse_meta_type(char *arg, char *type, int *index, char **endptr)
         *type = 'g';
 }
 
-static void opt_map_metadata(const char *arg)
+static int opt_map_metadata(const char *opt, const char *arg)
 {
     AVMetaDataMap *m, *m1;
     char *p;
@@ -3111,16 +3120,18 @@ static void opt_map_metadata(const char *arg)
         metadata_streams_autocopy = 0;
     if (m->type == 'c' || m1->type == 'c')
         metadata_chapters_autocopy = 0;
+
+    return 0;
 }
 
-static void opt_map_meta_data(const char *arg)
+static int opt_map_meta_data(const char *opt, const char *arg)
 {
     fprintf(stderr, "-map_meta_data is deprecated and will be removed soon. "
                     "Use -map_metadata instead.\n");
-    opt_map_metadata(arg);
+    return opt_map_metadata(opt, arg);
 }
 
-static void opt_map_chapters(const char *arg)
+static int opt_map_chapters(const char *opt, const char *arg)
 {
     AVChapterMap *c;
     char *p;
@@ -3133,9 +3144,10 @@ static void opt_map_chapters(const char *arg)
         p++;
 
     c->in_file = strtol(p, &p, 0);
+    return 0;
 }
 
-static void opt_input_ts_scale(const char *arg)
+static int opt_input_ts_scale(const char *opt, const char *arg)
 {
     unsigned int stream;
     double scale;
@@ -3151,6 +3163,7 @@ static void opt_input_ts_scale(const char *arg)
 
     input_files_ts_scale[nb_input_files] = grow_array(input_files_ts_scale[nb_input_files], sizeof(*input_files_ts_scale[nb_input_files]), &nb_input_files_ts_scale[nb_input_files], stream + 1);
     input_files_ts_scale[nb_input_files][stream]= scale;
+    return 0;
 }
 
 static int opt_recording_time(const char *opt, const char *arg)
@@ -3211,7 +3224,7 @@ static enum CodecID find_codec_or_die(const char *name, int type, int encoder, i
     return codec->id;
 }
 
-static void opt_input_file(const char *filename)
+static int opt_input_file(const char *opt, const char *filename)
 {
     AVFormatContext *ic;
     AVFormatParameters params, *ap = &params;
@@ -3433,6 +3446,7 @@ static void opt_input_file(const char *filename)
     av_freep(&subtitle_codec_name);
     uninit_opts();
     init_opts();
+    return 0;
 }
 
 static void check_inputs(int *has_video_ptr,
@@ -4101,7 +4115,7 @@ static void show_help(void)
     av_opt_show2(sws_opts, NULL, AV_OPT_FLAG_ENCODING_PARAM|AV_OPT_FLAG_DECODING_PARAM, 0);
 }
 
-static void opt_target(const char *arg)
+static int opt_target(const char *opt, const char *arg)
 {
     enum { PAL, NTSC, FILM, UNKNOWN } norm = UNKNOWN;
     static const char *const frame_rates[] = {"25", "30000/1001", "24000/1001"};
@@ -4158,13 +4172,12 @@ static void opt_target(const char *arg)
     }
 
     if(!strcmp(arg, "vcd")) {
+        opt_video_codec("vcodec", "mpeg1video");
+        opt_audio_codec("vcodec", "mp2");
+        opt_format("f", "vcd");
 
-        opt_video_codec("mpeg1video");
-        opt_audio_codec("mp2");
-        opt_format("vcd");
-
-        opt_frame_size(norm == PAL ? "352x288" : "352x240");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "352x288" : "352x240");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "1150000");
@@ -4187,12 +4200,12 @@ static void opt_target(const char *arg)
         mux_preload= (36000+3*1200) / 90000.0; //0.44
     } else if(!strcmp(arg, "svcd")) {
 
-        opt_video_codec("mpeg2video");
-        opt_audio_codec("mp2");
-        opt_format("svcd");
+        opt_video_codec("vcodec", "mpeg2video");
+        opt_audio_codec("acodec", "mp2");
+        opt_format("f", "svcd");
 
-        opt_frame_size(norm == PAL ? "480x576" : "480x480");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "480x576" : "480x480");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "2040000");
@@ -4209,12 +4222,12 @@ static void opt_target(const char *arg)
 
     } else if(!strcmp(arg, "dvd")) {
 
-        opt_video_codec("mpeg2video");
-        opt_audio_codec("ac3");
-        opt_format("dvd");
+        opt_video_codec("vcodec", "mpeg2video");
+        opt_audio_codec("vcodec", "ac3");
+        opt_format("f", "dvd");
 
-        opt_frame_size(norm == PAL ? "720x576" : "720x480");
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("vcodec", norm == PAL ? "720x576" : "720x480");
+        opt_frame_rate("r", frame_rates[norm]);
         opt_default("g", norm == PAL ? "15" : "18");
 
         opt_default("b", "6000000");
@@ -4230,29 +4243,31 @@ static void opt_target(const char *arg)
 
     } else if(!strncmp(arg, "dv", 2)) {
 
-        opt_format("dv");
+        opt_format("f", "dv");
 
-        opt_frame_size(norm == PAL ? "720x576" : "720x480");
-        opt_frame_pix_fmt(!strncmp(arg, "dv50", 4) ? "yuv422p" :
-                          (norm == PAL ? "yuv420p" : "yuv411p"));
-        opt_frame_rate(NULL, frame_rates[norm]);
+        opt_frame_size("s", norm == PAL ? "720x576" : "720x480");
+        opt_frame_pix_fmt("pix_fmt", !strncmp(arg, "dv50", 4) ? "yuv422p" :
+                          norm == PAL ? "yuv420p" : "yuv411p");
+        opt_frame_rate("r", frame_rates[norm]);
 
         audio_sample_rate = 48000;
         audio_channels = 2;
 
     } else {
         fprintf(stderr, "Unknown target: %s\n", arg);
-        ffmpeg_exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
-static void opt_vstats_file (const char *arg)
+static int opt_vstats_file(const char *opt, const char *arg)
 {
     av_free (vstats_filename);
     vstats_filename=av_strdup (arg);
+    return 0;
 }
 
-static void opt_vstats (void)
+static int opt_vstats(const char *opt, const char *arg)
 {
     char filename[40];
     time_t today2 = time(NULL);
@@ -4260,7 +4275,7 @@ static void opt_vstats (void)
 
     snprintf(filename, sizeof(filename), "vstats_%02d%02d%02d.log", today->tm_hour, today->tm_min,
              today->tm_sec);
-    opt_vstats_file(filename);
+    return opt_vstats_file(opt, filename);
 }
 
 static int opt_bsf(const char *opt, const char *arg)
@@ -4307,13 +4322,13 @@ static int opt_preset(const char *opt, const char *arg)
             ffmpeg_exit(1);
         }
         if(!strcmp(tmp, "acodec")){
-            opt_audio_codec(tmp2);
+            opt_audio_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "vcodec")){
-            opt_video_codec(tmp2);
+            opt_video_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "scodec")){
-            opt_subtitle_codec(tmp2);
+            opt_subtitle_codec(tmp, tmp2);
         }else if(!strcmp(tmp, "dcodec")){
-            opt_data_codec(tmp2);
+            opt_data_codec(tmp, tmp2);
         }else if(opt_default(tmp, tmp2) < 0){
             fprintf(stderr, "%s: Invalid option or argument: '%s', parsed as '%s' = '%s'\n", filename, line, tmp, tmp2);
             ffmpeg_exit(1);
@@ -4347,17 +4362,17 @@ static const OptionDef options[] = {
     { "map_metadata", HAS_ARG | OPT_EXPERT, {(void*)opt_map_metadata}, "set metadata information of outfile from infile",
       "outfile[,metadata]:infile[,metadata]" },
     { "map_chapters",  HAS_ARG | OPT_EXPERT, {(void*)opt_map_chapters},  "set chapters mapping", "outfile:infile" },
-    { "t", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" },
+    { "t", HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" },
     { "fs", HAS_ARG | OPT_INT64, {(void*)&limit_filesize}, "set the limit file size in bytes", "limit_size" }, //
-    { "ss", OPT_FUNC2 | HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" },
-    { "itsoffset", OPT_FUNC2 | HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" },
+    { "ss", HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" },
+    { "itsoffset", HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" },
     { "itsscale", HAS_ARG, {(void*)opt_input_ts_scale}, "set the input ts scale", "stream:scale" },
-    { "timestamp", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" },
-    { "metadata", OPT_FUNC2 | HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" },
+    { "timestamp", HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" },
+    { "metadata", HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" },
     { "dframes", OPT_INT | HAS_ARG, {(void*)&max_frames[AVMEDIA_TYPE_DATA]}, "set the number of data frames to record", "number" },
     { "benchmark", OPT_BOOL | OPT_EXPERT, {(void*)&do_benchmark},
       "add timings for benchmarking" },
-    { "timelimit", OPT_FUNC2 | HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" },
+    { "timelimit", HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" },
     { "dump", OPT_BOOL | OPT_EXPERT, {(void*)&do_pkt_dump},
       "dump each input packet" },
     { "hex", OPT_BOOL | OPT_EXPERT, {(void*)&do_hex_dump},
@@ -4365,9 +4380,9 @@ static const OptionDef options[] = {
     { "re", OPT_BOOL | OPT_EXPERT, {(void*)&rate_emu}, "read input at native frame rate", "" },
     { "loop_input", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" },
     { "loop_output", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&loop_output}, "number of times to loop output in formats that support looping (0 loops forever)", "" },
-    { "v", HAS_ARG | OPT_FUNC2, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" },
+    { "v", HAS_ARG, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" },
     { "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" },
-    { "threads", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
+    { "threads",  HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
     { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" },
     { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" },
     { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" },
@@ -4380,33 +4395,33 @@ static const OptionDef options[] = {
     { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)&copy_initial_nonkeyframes}, "copy initial non-keyframes" },
 
     /* video options */
-    { "b", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
-    { "vb", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "b",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "vb",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
     { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[AVMEDIA_TYPE_VIDEO]}, "set the number of video frames to record", "number" },
-    { "r", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
+    { "r",  HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
     { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
     { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format, 'list' as argument shows all the pixel formats supported", "format" },
     { "bits_per_raw_sample", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&frame_bits_per_raw_sample}, "set the number of bits per raw sample", "number" },
-    { "croptop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "cropright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
-    { "padtop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
-    { "padcolor", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" },
+    { "croptop",  HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropleft", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "cropright", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" },
+    { "padtop", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padleft", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padright", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" },
+    { "padcolor", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" },
     { "intra", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_only}, "use only intra frames"},
     { "vn", OPT_BOOL | OPT_VIDEO, {(void*)&video_disable}, "disable video" },
     { "vdt", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&video_discard}, "discard threshold", "n" },
-    { "qscale", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" },
+    { "qscale", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" },
     { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" },
     { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
-    { "me_threshold", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
+    { "me_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
     { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality},
       "use same quantizer as source (implies VBR)" },
-    { "pass", HAS_ARG | OPT_FUNC2 | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" },
+    { "pass", HAS_ARG | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" },
     { "passlogfile", HAS_ARG | OPT_VIDEO, {(void*)&opt_passlogfile}, "select two pass log file name prefix", "prefix" },
     { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace},
       "deinterlace pictures" },
@@ -4418,39 +4433,39 @@ static const OptionDef options[] = {
 #endif
     { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" },
     { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" },
-    { "top", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
+    { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
     { "dc", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_dc_precision}, "intra_dc_precision", "precision" },
-    { "vtag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" },
-    { "newvideo", OPT_VIDEO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new video stream to the current output stream" },
+    { "vtag", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" },
+    { "newvideo", OPT_VIDEO, {(void*)opt_new_stream}, "add a new video stream to the current output stream" },
     { "vlang", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void *)&video_language}, "set the ISO 639 language code (3 letters) of the current video stream" , "code" },
     { "qphist", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, { (void *)&qp_hist }, "show QP histogram" },
     { "force_fps", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&force_fps}, "force the selected framerate, disable the best supported framerate selection" },
-    { "streamid", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" },
+    { "streamid", HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" },
     { "force_key_frames", OPT_STRING | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void *)&forced_key_frames}, "force key frames at specified timestamps", "timestamps" },
 
     /* audio options */
-    { "ab", OPT_FUNC2 | HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
     { "aframes", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&max_frames[AVMEDIA_TYPE_AUDIO]}, "set the number of audio frames to record", "number" },
     { "aq", OPT_FLOAT | HAS_ARG | OPT_AUDIO, {(void*)&audio_qscale}, "set audio quality (codec-specific)", "quality", },
-    { "ar", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" },
-    { "ac", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" },
+    { "ar", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" },
+    { "ac", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" },
     { "an", OPT_BOOL | OPT_AUDIO, {(void*)&audio_disable}, "disable audio" },
     { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_codec}, "force audio codec ('copy' to copy stream)", "codec" },
-    { "atag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" },
+    { "atag", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" },
     { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, //
-    { "newaudio", OPT_AUDIO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" },
+    { "newaudio", OPT_AUDIO, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" },
     { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void *)&audio_language}, "set the ISO 639 language code (3 letters) of the current audio stream" , "code" },
     { "sample_fmt", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_audio_sample_fmt}, "set sample format, 'list' as argument shows all the sample formats supported", "format" },
 
     /* subtitle options */
     { "sn", OPT_BOOL | OPT_SUBTITLE, {(void*)&subtitle_disable}, "disable subtitle" },
     { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_subtitle_codec}, "force subtitle codec ('copy' to copy stream)", "codec" },
-    { "newsubtitle", OPT_SUBTITLE | OPT_FUNC2, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" },
+    { "newsubtitle", OPT_SUBTITLE, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" },
     { "slang", HAS_ARG | OPT_STRING | OPT_SUBTITLE, {(void *)&subtitle_language}, "set the ISO 639 language code (3 letters) of the current subtitle stream" , "code" },
-    { "stag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" },
+    { "stag", HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" },
 
     /* grab options */
-    { "vc", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" },
+    { "vc", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" },
     { "tvstd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_standard}, "set television standard (NTSC, PAL (SECAM))", "standard" },
     { "isync", OPT_BOOL | OPT_EXPERT | OPT_GRAB, {(void*)&input_sync}, "sync read on input", "" },
 
@@ -4458,18 +4473,18 @@ static const OptionDef options[] = {
     { "muxdelay", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_max_delay}, "set the maximum demux-decode delay", "seconds" },
     { "muxpreload", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_preload}, "set the initial demux-decode delay", "seconds" },
 
-    { "absf", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
-    { "vbsf", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
-    { "sbsf", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "absf", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "vbsf", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
+    { "sbsf", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" },
 
-    { "apre", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" },
-    { "vpre", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" },
-    { "spre", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" },
-    { "fpre", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" },
+    { "apre", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" },
+    { "vpre", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" },
+    { "spre", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" },
+    { "fpre", HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" },
     /* data codec support */
     { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_data_codec}, "force data codec ('copy' to copy stream)", "codec" },
 
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { NULL, },
 };
 
diff --git a/ffplay.c b/ffplay.c
index e2e097a403..9b9b665385 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -2817,16 +2817,17 @@ static void event_loop(void)
     }
 }
 
-static void opt_frame_size(const char *arg)
+static int opt_frame_size(const char *opt, const char *arg)
 {
     if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) {
         fprintf(stderr, "Incorrect frame size\n");
-        exit(1);
+        return AVERROR(EINVAL);
     }
     if ((frame_width % 2) != 0 || (frame_height % 2) != 0) {
         fprintf(stderr, "Frame size must be a multiple of 2\n");
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static int opt_width(const char *opt, const char *arg)
@@ -2841,18 +2842,20 @@ static int opt_height(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     file_iformat = av_find_input_format(arg);
     if (!file_iformat) {
         fprintf(stderr, "Unknown input format: %s\n", arg);
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
-static void opt_frame_pix_fmt(const char *arg)
+static int opt_frame_pix_fmt(const char *opt, const char *arg)
 {
     frame_pix_fmt = av_get_pix_fmt(arg);
+    return 0;
 }
 
 static int opt_sync(const char *opt, const char *arg)
@@ -2915,8 +2918,8 @@ static int opt_show_mode(const char *opt, const char *arg)
 
 static const OptionDef options[] = {
 #include "cmdutils_common_opts.h"
-    { "x", HAS_ARG | OPT_FUNC2, {(void*)opt_width}, "force displayed width", "width" },
-    { "y", HAS_ARG | OPT_FUNC2, {(void*)opt_height}, "force displayed height", "height" },
+    { "x", HAS_ARG, {(void*)opt_width}, "force displayed width", "width" },
+    { "y", HAS_ARG, {(void*)opt_height}, "force displayed height", "height" },
     { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
     { "fs", OPT_BOOL, {(void*)&is_full_screen}, "force full screen" },
     { "an", OPT_BOOL, {(void*)&audio_disable}, "disable audio" },
@@ -2924,16 +2927,16 @@ static const OptionDef options[] = {
     { "ast", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_AUDIO]}, "select desired audio stream", "stream_number" },
     { "vst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_VIDEO]}, "select desired video stream", "stream_number" },
     { "sst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_SUBTITLE]}, "select desired subtitle stream", "stream_number" },
-    { "ss", HAS_ARG | OPT_FUNC2, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" },
-    { "t", HAS_ARG | OPT_FUNC2, {(void*)&opt_duration}, "play  \"duration\" seconds of audio/video", "duration" },
+    { "ss", HAS_ARG, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" },
+    { "t", HAS_ARG, {(void*)&opt_duration}, "play  \"duration\" seconds of audio/video", "duration" },
     { "bytes", OPT_INT | HAS_ARG, {(void*)&seek_by_bytes}, "seek by bytes 0=off 1=on -1=auto", "val" },
     { "nodisp", OPT_BOOL, {(void*)&display_disable}, "disable graphical display" },
     { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format", "format" },
     { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" },
-    { "debug", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
+    { "debug", HAS_ARG | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
     { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" },
-    { "vismv", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" },
+    { "vismv", HAS_ARG | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" },
     { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" },
     { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" },
     { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&decoder_reorder_pts}, "let decoder reorder pts 0=off 1=on -1=auto", ""},
@@ -2944,8 +2947,8 @@ static const OptionDef options[] = {
     { "idct", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&idct}, "set idct algo",  "algo" },
     { "er", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_recognition}, "set error detection threshold (0-4)",  "threshold" },
     { "ec", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_concealment}, "set error concealment options",  "bit_mask" },
-    { "sync", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
-    { "threads", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
+    { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
+    { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
     { "autoexit", OPT_BOOL | OPT_EXPERT, {(void*)&autoexit}, "exit at the end", "" },
     { "exitonkeydown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_keydown}, "exit on key down", "" },
     { "exitonmousedown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_mousedown}, "exit on mouse down", "" },
@@ -2956,8 +2959,8 @@ static const OptionDef options[] = {
     { "vf", OPT_STRING | HAS_ARG, {(void*)&vfilters}, "video filters", "filter list" },
 #endif
     { "rdftspeed", OPT_INT | HAS_ARG| OPT_AUDIO | OPT_EXPERT, {(void*)&rdftspeed}, "rdft speed", "msecs" },
-    { "showmode", HAS_ARG | OPT_FUNC2, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" },
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "showmode", HAS_ARG, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { "i", OPT_DUMMY, {NULL}, "ffmpeg compatibility dummy option", ""},
     { NULL, },
 };
diff --git a/ffprobe.c b/ffprobe.c
index 696307d8e3..57e2a9bc06 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -329,13 +329,14 @@ static void show_usage(void)
     printf("\n");
 }
 
-static void opt_format(const char *arg)
+static int opt_format(const char *opt, const char *arg)
 {
     iformat = av_find_input_format(arg);
     if (!iformat) {
         fprintf(stderr, "Unknown input format: %s\n", arg);
-        exit(1);
+        return AVERROR(EINVAL);
     }
+    return 0;
 }
 
 static void opt_input_file(const char *arg)
@@ -382,7 +383,7 @@ static const OptionDef options[] = {
     { "show_format",  OPT_BOOL, {(void*)&do_show_format} , "show format/container info" },
     { "show_packets", OPT_BOOL, {(void*)&do_show_packets}, "show packets info" },
     { "show_streams", OPT_BOOL, {(void*)&do_show_streams}, "show streams info" },
-    { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { NULL, },
 };
 

From 34e9c9b11192271a4e5d0557441d09686852c771 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 10:50:11 +0200
Subject: [PATCH 419/830] iff: remove duplicated file description

---
 libavformat/iff.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/iff.c b/libavformat/iff.c
index f6edcdda2e..2dd1ef7553 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -1,5 +1,4 @@
 /*
- * IFF (.iff) file demuxer
  * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
  * Copyright (c) 2010 Peter Ross <pross@xvid.org>
  * Copyright (c) 2010 Sebastian Vater <cdgs.basty@googlemail.com>

From e71f26086ab899be7df1efb30b33e0a11973fd8e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 26 May 2011 19:19:18 +0200
Subject: [PATCH 420/830] lavu: add av_get_pix_fmt_name() convenience function

Also deprecate avcodec_get_pix_fmt_name() in its favor.
---
 doc/APIchanges          | 4 ++++
 libavcodec/avcodec.h    | 4 ++++
 libavcodec/imgconvert.c | 7 +++----
 libavcodec/version.h    | 5 ++++-
 libavutil/avutil.h      | 4 ++--
 libavutil/pixdesc.c     | 6 ++++++
 libavutil/pixdesc.h     | 9 +++++++++
 7 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index fe77697d93..d853cfecbd 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-05-28 - xxxxxx - lavu 51.3.0 - pixdesc.h
+  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
+  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
+
 2011-05-22 - xxxxxx - lavf 53.2.0 - avformat.h
   Introduce avformat_alloc_output_context2() and deprecate
   avformat_alloc_output_context().
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index fdc86bb7c0..ef539a2a36 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3391,12 +3391,16 @@ int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width,
 int avpicture_get_size(enum PixelFormat pix_fmt, int width, int height);
 void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift);
 
+#if FF_API_GET_PIX_FMT_NAME
 /**
  * Return the short name for a pixel format.
  *
  * \see av_get_pix_fmt(), av_get_pix_fmt_string().
+ * @deprecated Deprecated in favor of av_get_pix_fmt_name().
  */
+attribute_deprecated
 const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt);
+#endif
 
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
 
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index a86d2bd027..9aa584fa5c 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -417,13 +417,12 @@ void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *
     *v_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h;
 }
 
+#if FF_API_GET_PIX_FMT_NAME
 const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt)
 {
-    if ((unsigned)pix_fmt >= PIX_FMT_NB)
-        return NULL;
-    else
-        return av_pix_fmt_descriptors[pix_fmt].name;
+    return av_get_pix_fmt_name(pix_fmt);
 }
+#endif
 
 int ff_is_hwaccel_pix_fmt(enum PixelFormat pix_fmt)
 {
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 471e3aaa9a..5a2e0cce2f 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -22,7 +22,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR 53
 #define LIBAVCODEC_VERSION_MINOR  6
-#define LIBAVCODEC_VERSION_MICRO  0
+#define LIBAVCODEC_VERSION_MICRO  1
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -65,5 +65,8 @@
 #ifndef FF_API_FLAC_GLOBAL_OPTS
 #define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_GET_PIX_FMT_NAME
+#define FF_API_GET_PIX_FMT_NAME (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index ca3e029459..e3bc6c72fa 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,8 +40,8 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  2
-#define LIBAVUTIL_VERSION_MICRO  2
+#define LIBAVUTIL_VERSION_MINOR  3
+#define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index f8f99289f9..57a3860e71 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -906,6 +906,12 @@ static enum PixelFormat get_pix_fmt_internal(const char *name)
     return PIX_FMT_NONE;
 }
 
+const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt)
+{
+    return (unsigned)pix_fmt < PIX_FMT_NB ?
+        av_pix_fmt_descriptors[pix_fmt].name : NULL;
+}
+
 #if HAVE_BIGENDIAN
 #   define X_NE(be, le) be
 #else
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index 727e47f06a..fad6d59d87 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -22,6 +22,7 @@
 #ifndef AVUTIL_PIXDESC_H
 #define AVUTIL_PIXDESC_H
 
+#include "pixfmt.h"
 #include <inttypes.h>
 
 typedef struct AVComponentDescriptor{
@@ -141,6 +142,14 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi
  */
 enum PixelFormat av_get_pix_fmt(const char *name);
 
+/**
+ * Return the short name for a pixel format, NULL in case pix_fmt is
+ * unknown.
+ *
+ * @see av_get_pix_fmt(), av_get_pix_fmt_string()
+ */
+const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt);
+
 /**
  * Print in buf the string corresponding to the pixel format with
  * number pix_fmt, or an header if pix_fmt is negative.

From 7743865ffcb921a78a913b3de5a6d80248954d71 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 26 May 2011 15:57:21 -0400
Subject: [PATCH 421/830] ac3enc: store per-block/channel bap pointers by
 reference block in a 2D array rather than in the AC3Block struct.

This will make it easier to access the bap values without having to chase
the reference block pointers each time.
---
 libavcodec/ac3enc.c | 52 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index c85c69d248..1faafb61d6 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -107,7 +107,6 @@ typedef struct AC3EncOptions {
  * Data for a single audio block.
  */
 typedef struct AC3Block {
-    uint8_t  **bap;                             ///< bit allocation pointers (bap)
     CoefType **mdct_coef;                       ///< MDCT coefficients
     int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
     uint8_t  **exp;                             ///< original exponents
@@ -122,7 +121,6 @@ typedef struct AC3Block {
     uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
     int      num_rematrixing_bands;             ///< number of rematrixing bands
     uint8_t  rematrixing_flags[4];              ///< rematrixing flags
-    struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE
     int      new_cpl_strategy;                  ///< send new coupling strategy
     int      cpl_in_use;                        ///< coupling in use for this block     (cplinu)
     uint8_t  channel_in_cpl[AC3_MAX_CHANNELS];  ///< channel in coupling                (chincpl)
@@ -219,6 +217,9 @@ typedef struct AC3EncodeContext {
     uint8_t *cpl_coord_mant_buffer;
 
     uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
+    uint8_t exp_ref_block[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< reference blocks for EXP_REUSE
+    uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
+    int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
     DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
 } AC3EncodeContext;
@@ -1073,10 +1074,10 @@ static void encode_exponents(AC3EncodeContext *s)
             blk1 = blk + 1;
 
             /* count the number of EXP_REUSE blocks after the current block
-               and set exponent reference block pointers */
-            block->exp_ref_block[ch] = block;
+               and set exponent reference block numbers */
+            s->exp_ref_block[ch][blk] = blk;
             while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
-                s->blocks[blk1].exp_ref_block[ch] = block;
+                s->exp_ref_block[ch][blk1] = blk;
                 blk1++;
             }
             num_reuse_blocks = blk1 - blk - 1;
@@ -1091,6 +1092,9 @@ static void encode_exponents(AC3EncodeContext *s)
             blk = blk1;
         }
     }
+
+    /* reference block numbers have been changed, so reset ref_bap_set */
+    s->ref_bap_set = 0;
 }
 
 
@@ -1472,14 +1476,18 @@ static void bit_alloc_masking(AC3EncodeContext *s)
 static void reset_block_bap(AC3EncodeContext *s)
 {
     int blk, ch;
-    int channels = s->channels + 1;
-    if (s->blocks[0].bap[0] == s->bap_buffer)
+    uint8_t *ref_bap;
+
+    if (s->ref_bap[0][0] == s->bap_buffer && s->ref_bap_set)
         return;
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        for (ch = 0; ch < channels; ch++) {
-            s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * channels + ch)];
-        }
+
+    ref_bap = s->bap_buffer;
+    for (ch = 0; ch <= s->channels; ch++) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk];
+        ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS;
     }
+    s->ref_bap_set = 1;
 }
 
 
@@ -1502,7 +1510,6 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
     mantissa_bits = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        AC3Block *ref_block;
         int av_uninit(ch0);
         int got_cpl = !block->cpl_in_use;
         // initialize grouped mantissa counts. these are set so that they are
@@ -1522,15 +1529,14 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
                blocks within a frame are the exponent values.  We can take
                advantage of that by reusing the bit allocation pointers
                whenever we reuse exponents. */
-            ref_block = block->exp_ref_block[ch];
             if (s->exp_strategy[ch][blk] != EXP_REUSE) {
-                s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], ref_block->psd[ch],
+                s->ac3dsp.bit_alloc_calc_bap(block->mask[ch], block->psd[ch],
                                              s->start_freq[ch], block->end_freq[ch],
                                              snr_offset, s->bit_alloc.floor,
-                                             ff_ac3_bap_tab, ref_block->bap[ch]);
+                                             ff_ac3_bap_tab, s->ref_bap[ch][blk]);
             }
             mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             ref_block->bap[ch]+s->start_freq[ch],
+                                                             s->ref_bap[ch][blk]+s->start_freq[ch],
                                                              block->end_freq[ch]-s->start_freq[ch]);
             if (ch == CPL_CH)
                 ch = ch0;
@@ -1812,7 +1818,6 @@ static void quantize_mantissas(AC3EncodeContext *s)
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        AC3Block *ref_block;
         AC3Mant m = { 0 };
 
         got_cpl = !block->cpl_in_use;
@@ -1822,10 +1827,9 @@ static void quantize_mantissas(AC3EncodeContext *s)
                 ch      = CPL_CH;
                 got_cpl = 1;
             }
-            ref_block = block->exp_ref_block[ch];
             quantize_mantissas_blk_ch(&m, block->fixed_coef[ch],
-                                      ref_block->exp[ch],
-                                      ref_block->bap[ch], block->qmant[ch],
+                                      s->blocks[s->exp_ref_block[ch][blk]].exp[ch],
+                                      s->ref_bap[ch][blk], block->qmant[ch],
                                       s->start_freq[ch], block->end_freq[ch]);
             if (ch == CPL_CH)
                 ch = ch0;
@@ -2130,17 +2134,15 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     got_cpl = !block->cpl_in_use;
     for (ch = 1; ch <= s->channels; ch++) {
         int b, q;
-        AC3Block *ref_block;
 
         if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
             ch0     = ch - 1;
             ch      = CPL_CH;
             got_cpl = 1;
         }
-        ref_block = block->exp_ref_block[ch];
         for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) {
             q = block->qmant[ch][i];
-            b = ref_block->bap[ch][i];
+            b = s->ref_bap[ch][blk][i];
             switch (b) {
             case 0:                                         break;
             case 1: if (q != 128) put_bits(&s->pb,   5, q); break;
@@ -2597,7 +2599,6 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->qmant_buffer);
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        av_freep(&block->bap);
         av_freep(&block->mdct_coef);
         av_freep(&block->fixed_coef);
         av_freep(&block->exp);
@@ -2896,8 +2897,6 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
     }
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        FF_ALLOC_OR_GOTO(avctx, block->bap, channels * sizeof(*block->bap),
-                         alloc_fail);
         FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
                           alloc_fail);
         FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp),
@@ -2921,7 +2920,6 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
 
         for (ch = 0; ch < channels; ch++) {
             /* arrangement: block, channel, coeff */
-            block->bap[ch]         = &s->bap_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
             block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * channels + ch)];
             block->psd[ch]         = &s->psd_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
             block->band_psd[ch]    = &s->band_psd_buffer   [64            * (blk * channels + ch)];

From 1323828a0fbfa428d2e39a9f094039637b7fef5b Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 26 May 2011 12:31:31 -0400
Subject: [PATCH 422/830] ac3enc: split mantissa bit counting into a separate
 function.

No speed difference. This is to allow for more flexible bit counting.
---
 libavcodec/ac3enc.c | 49 ++++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 1faafb61d6..6b9bd87853 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1491,22 +1491,12 @@ static void reset_block_bap(AC3EncodeContext *s)
 }
 
 
-/**
- * Run the bit allocation with a given SNR offset.
- * This calculates the bit allocation pointers that will be used to determine
- * the quantization of each mantissa.
- * @return the number of bits needed for mantissas if the given SNR offset is
- *         is used.
- */
-static int bit_alloc(AC3EncodeContext *s, int snr_offset)
+static int count_mantissa_bits(AC3EncodeContext *s)
 {
     int blk, ch;
     int mantissa_bits;
     int mant_cnt[5];
 
-    snr_offset = (snr_offset - 240) << 2;
-
-    reset_block_bap(s);
     mantissa_bits = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
@@ -1524,7 +1514,36 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
                 ch      = CPL_CH;
                 got_cpl = 1;
             }
+            mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
+                                                             s->ref_bap[ch][blk]+s->start_freq[ch],
+                                                             block->end_freq[ch]-s->start_freq[ch]);
+            if (ch == CPL_CH)
+                ch = ch0;
+        }
+        mantissa_bits += compute_mantissa_size_final(mant_cnt);
+    }
+    return mantissa_bits;
+}
 
+
+/**
+ * Run the bit allocation with a given SNR offset.
+ * This calculates the bit allocation pointers that will be used to determine
+ * the quantization of each mantissa.
+ * @return the number of bits needed for mantissas if the given SNR offset is
+ *         is used.
+ */
+static int bit_alloc(AC3EncodeContext *s, int snr_offset)
+{
+    int blk, ch;
+
+    snr_offset = (snr_offset - 240) << 2;
+
+    reset_block_bap(s);
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+
+        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             /* Currently the only bit allocation parameters which vary across
                blocks within a frame are the exponent values.  We can take
                advantage of that by reusing the bit allocation pointers
@@ -1535,15 +1554,9 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
                                              snr_offset, s->bit_alloc.floor,
                                              ff_ac3_bap_tab, s->ref_bap[ch][blk]);
             }
-            mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             s->ref_bap[ch][blk]+s->start_freq[ch],
-                                                             block->end_freq[ch]-s->start_freq[ch]);
-            if (ch == CPL_CH)
-                ch = ch0;
         }
-        mantissa_bits += compute_mantissa_size_final(mant_cnt);
     }
-    return mantissa_bits;
+    return count_mantissa_bits(s);
 }
 
 

From 6ca23db9cccac05bef9bf9c665821b396af12a0b Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 26 May 2011 15:53:25 -0400
Subject: [PATCH 423/830] ac3enc: modify mantissa bit counting to keep bap
 counts for all values of bap instead of just 0 to 4.

This does all the actual bit counting as a final step.
---
 libavcodec/ac3dsp.c              | 42 +++++++++------
 libavcodec/ac3dsp.h              | 22 +++++++-
 libavcodec/ac3enc.c              | 92 +++++++++++++++++---------------
 libavcodec/arm/ac3dsp_arm.S      | 52 ------------------
 libavcodec/arm/ac3dsp_init_arm.c |  2 -
 libavcodec/x86/ac3dsp.asm        | 53 ++++++++++++++++++
 libavcodec/x86/ac3dsp_mmx.c      |  3 ++
 7 files changed, 151 insertions(+), 115 deletions(-)
 delete mode 100644 libavcodec/arm/ac3dsp_arm.S

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index e3ca37ebdd..de58f3ab26 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
     } while (end > ff_ac3_band_start_tab[band++]);
 }
 
-static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap,
-                                       int nb_coefs)
+static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
+                                    int len)
 {
-    int bits, b, i;
+    while (len-- >= 0)
+        mant_cnt[bap[len]]++;
+}
 
-    bits = 0;
-    for (i = 0; i < nb_coefs; i++) {
-        b = bap[i];
-        if (b <= 4) {
-            // bap=1 to bap=4 will be counted in compute_mantissa_size_final
-            mant_cnt[b]++;
-        } else if (b <= 13) {
-            // bap=5 to bap=13 use (bap-1) bits
-            bits += b - 1;
-        } else {
-            // bap=14 uses 14 bits and bap=15 uses 16 bits
-            bits += (b == 14) ? 14 : 16;
-        }
+DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+    0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
+};
+
+static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
+{
+    int blk, bap;
+    int bits = 0;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        // bap=1 : 3 mantissas in 5 bits
+        bits += (mant_cnt[blk][1] / 3) * 5;
+        // bap=2 : 3 mantissas in 7 bits
+        // bap=4 : 2 mantissas in 7 bits
+        bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
+        // bap=3 : 1 mantissa in 3 bits
+        bits += mant_cnt[blk][3] * 3;
+        // bap=5 to 15 : get bits per mantissa from table
+        for (bap = 5; bap < 16; bap++)
+            bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
     }
     return bits;
 }
@@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
     c->ac3_rshift_int32 = ac3_rshift_int32_c;
     c->float_to_fixed24 = float_to_fixed24_c;
     c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
+    c->update_bap_counts = ac3_update_bap_counts_c;
     c->compute_mantissa_size = ac3_compute_mantissa_size_c;
     c->extract_exponents = ac3_extract_exponents_c;
 
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index b750767e81..8eeafd68ac 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -24,6 +24,12 @@
 
 #include <stdint.h>
 
+/**
+ * Number of mantissa bits written for each bap value.
+ * bap values with fractional bits are set to 0 and are calculated separately.
+ */
+extern const uint16_t ff_ac3_bap_bits[16];
+
 typedef struct AC3DSPContext {
     /**
      * Set each encoded exponent in a block to the minimum of itself and the
@@ -102,9 +108,21 @@ typedef struct AC3DSPContext {
                                const uint8_t *bap_tab, uint8_t *bap);
 
     /**
-     * Calculate the number of bits needed to encode a set of mantissas.
+     * Update bap counts using the supplied array of bap.
+     *
+     * @param[out] mant_cnt   bap counts for 1 block
+     * @param[in]  bap        array of bap, pointing to start coef bin
+     * @param[in]  len        number of elements to process
      */
-    int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs);
+    void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len);
+
+    /**
+     * Calculate the number of bits needed to encode a set of mantissas.
+     *
+     * @param[in] mant_cnt    bap counts for all blocks
+     * @return                mantissa bit count
+     */
+    int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]);
 
     void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
 } AC3DSPContext;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 6b9bd87853..66dfc29217 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1423,22 +1423,6 @@ static void count_frame_bits(AC3EncodeContext *s)
 }
 
 
-/**
- * Finalize the mantissa bit count by adding in the grouped mantissas.
- */
-static int compute_mantissa_size_final(int mant_cnt[5])
-{
-    // bap=1 : 3 mantissas in 5 bits
-    int bits = (mant_cnt[1] / 3) * 5;
-    // bap=2 : 3 mantissas in 7 bits
-    // bap=4 : 2 mantissas in 7 bits
-    bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7;
-    // bap=3 : each mantissa is 3 bits
-    bits += mant_cnt[3] * 3;
-    return bits;
-}
-
-
 /**
  * Calculate masking curve based on the final exponents.
  * Also calculate the power spectral densities to use in future calculations.
@@ -1491,38 +1475,60 @@ static void reset_block_bap(AC3EncodeContext *s)
 }
 
 
-static int count_mantissa_bits(AC3EncodeContext *s)
+/**
+ * Initialize mantissa counts.
+ * These are set so that they are padded to the next whole group size when bits
+ * are counted in compute_mantissa_size.
+ */
+static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16])
 {
-    int blk, ch;
-    int mantissa_bits;
-    int mant_cnt[5];
+    int blk;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk]));
+        mant_cnt[blk][1] = mant_cnt[blk][2] = 2;
+        mant_cnt[blk][4] = 1;
+    }
+}
+
+
+/**
+ * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth
+ * range.
+ */
+static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
+                                          uint16_t mant_cnt[AC3_MAX_BLOCKS][16],
+                                          int start, int end)
+{
+    int blk;
 
-    mantissa_bits = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        int av_uninit(ch0);
-        int got_cpl = !block->cpl_in_use;
-        // initialize grouped mantissa counts. these are set so that they are
-        // padded to the next whole group size when bits are counted in
-        // compute_mantissa_size_final
-        mant_cnt[0] = mant_cnt[3] = 0;
-        mant_cnt[1] = mant_cnt[2] = 2;
-        mant_cnt[4] = 1;
-        for (ch = 1; ch <= s->channels; ch++) {
-            if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
-                ch0     = ch - 1;
-                ch      = CPL_CH;
-                got_cpl = 1;
-            }
-            mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             s->ref_bap[ch][blk]+s->start_freq[ch],
-                                                             block->end_freq[ch]-s->start_freq[ch]);
-            if (ch == CPL_CH)
-                ch = ch0;
-        }
-        mantissa_bits += compute_mantissa_size_final(mant_cnt);
+        if (ch == CPL_CH && !block->cpl_in_use)
+            continue;
+        s->ac3dsp.update_bap_counts(mant_cnt[blk],
+                                    s->ref_bap[ch][blk] + start,
+                                    FFMIN(end, block->end_freq[ch]) - start);
     }
-    return mantissa_bits;
+}
+
+
+/**
+ * Count the number of mantissa bits in the frame based on the bap values.
+ */
+static int count_mantissa_bits(AC3EncodeContext *s)
+{
+    int ch, max_end_freq;
+    LOCAL_ALIGNED_16(uint16_t, mant_cnt,[AC3_MAX_BLOCKS][16]);
+
+    count_mantissa_bits_init(mant_cnt);
+
+    max_end_freq = s->bandwidth_code * 3 + 73;
+    for (ch = !s->cpl_enabled; ch <= s->channels; ch++)
+        count_mantissa_bits_update_ch(s, ch, mant_cnt, s->start_freq[ch],
+                                      max_end_freq);
+
+    return s->ac3dsp.compute_mantissa_size(mant_cnt);
 }
 
 
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
deleted file mode 100644
index d7d498e41f..0000000000
--- a/libavcodec/arm/ac3dsp_arm.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "asm.S"
-
-function ff_ac3_compute_mantissa_size_arm, export=1
-        push            {r4-r8,lr}
-        ldm             r0,  {r4-r8}
-        mov             r3,  r0
-        mov             r0,  #0
-1:
-        ldrb            lr,  [r1], #1
-        subs            r2,  r2,  #1
-        blt             2f
-        cmp             lr,  #4
-        bgt             3f
-        subs            lr,  lr,  #1
-        addlt           r4,  r4,  #1
-        addeq           r5,  r5,  #1
-        ble             1b
-        subs            lr,  lr,  #2
-        addlt           r6,  r6,  #1
-        addeq           r7,  r7,  #1
-        addgt           r8,  r8,  #1
-        b               1b
-3:
-        cmp             lr,  #14
-        sublt           lr,  lr,  #1
-        addgt           r0,  r0,  #16
-        addle           r0,  r0,  lr
-        b               1b
-2:
-        stm             r3,  {r4-r8}
-        pop             {r4-r8,pc}
-endfunc
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index fd78e1e6a4..4414dc8170 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);
 
 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
-    c->compute_mantissa_size     = ff_ac3_compute_mantissa_size_arm;
-
     if (HAVE_ARMV6) {
         c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
     }
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 18f9dc3894..0d8f4b78eb 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -27,6 +27,11 @@ SECTION_RODATA
 ; 16777216.0f - used in ff_float_to_fixed24()
 pf_1_24: times 4 dd 0x4B800000
 
+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
+pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
 %endif
     ja .loop
     REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+%macro PHADDD4 2 ; xmm src, xmm tmp
+    movhlps  %2, %1
+    paddd    %1, %2
+    pshufd   %2, %1, 0x1
+    paddd    %1, %2
+%endmacro
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
+    movdqa      m0, [mant_cntq      ]
+    movdqa      m1, [mant_cntq+ 1*16]
+    paddw       m0, [mant_cntq+ 2*16]
+    paddw       m1, [mant_cntq+ 3*16]
+    paddw       m0, [mant_cntq+ 4*16]
+    paddw       m1, [mant_cntq+ 5*16]
+    paddw       m0, [mant_cntq+ 6*16]
+    paddw       m1, [mant_cntq+ 7*16]
+    paddw       m0, [mant_cntq+ 8*16]
+    paddw       m1, [mant_cntq+ 9*16]
+    paddw       m0, [mant_cntq+10*16]
+    paddw       m1, [mant_cntq+11*16]
+    pmaddwd     m0, [ff_ac3_bap_bits   ]
+    pmaddwd     m1, [ff_ac3_bap_bits+16]
+    paddd       m0, m1
+    PHADDD4     m0, m1
+    movd      sumd, m0
+    movdqa      m3, [pw_bap_mul1]
+    movhpd      m0, [mant_cntq     +2]
+    movlpd      m0, [mant_cntq+1*32+2]
+    movhpd      m1, [mant_cntq+2*32+2]
+    movlpd      m1, [mant_cntq+3*32+2]
+    movhpd      m2, [mant_cntq+4*32+2]
+    movlpd      m2, [mant_cntq+5*32+2]
+    pmulhuw     m0, m3
+    pmulhuw     m1, m3
+    pmulhuw     m2, m3
+    paddusw     m0, m1
+    paddusw     m0, m2
+    pmaddwd     m0, [pw_bap_mul2]
+    PHADDD4     m0, m1
+    movd       eax, m0
+    add        eax, sumd
+    RET
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 475042395c..2664736bb6 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i
 extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
 
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
     int mm_flags = av_get_cpu_flags();
@@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;

From b2a6f25c67cca644257f008c8de83e4939f87196 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 27 May 2011 01:34:35 +0200
Subject: [PATCH 424/830] lavdev: add SDL output device

---
 Changelog                |   1 +
 configure                |   2 +
 doc/outdevs.texi         |  43 ++++++++
 libavdevice/Makefile     |   1 +
 libavdevice/alldevices.c |   1 +
 libavdevice/avdevice.h   |   2 +-
 libavdevice/sdl.c        | 228 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 libavdevice/sdl.c

diff --git a/Changelog b/Changelog
index c5bec6a53b..37ccd06106 100644
--- a/Changelog
+++ b/Changelog
@@ -19,6 +19,7 @@ version <next>:
 - 9bit and 10bit FFV1 encoding / decoding
 - split filter added
 - select filter added
+- sdl output device added
 
 
 version 0.7_beta1:
diff --git a/configure b/configure
index 5f80ec5063..2c4f2e81e1 100755
--- a/configure
+++ b/configure
@@ -1471,6 +1471,7 @@ jack_indev_deps="jack_jack_h sem_timedwait"
 libdc1394_indev_deps="libdc1394"
 oss_indev_deps_any="soundcard_h sys_soundcard_h"
 oss_outdev_deps_any="soundcard_h sys_soundcard_h"
+sdl_outdev_deps="sdl"
 sndio_indev_deps="sndio_h"
 sndio_outdev_deps="sndio_h"
 v4l_indev_deps="linux_videodev_h"
@@ -2959,6 +2960,7 @@ else
     check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size
   fi
 fi
+enabled sdl && add_cflags $sdl_cflags && add_extralibs $sdl_libs
 
 texi2html -version > /dev/null 2>&1 && enable texi2html || disable texi2html
 
diff --git a/doc/outdevs.texi b/doc/outdevs.texi
index fbb312363c..9985900afc 100644
--- a/doc/outdevs.texi
+++ b/doc/outdevs.texi
@@ -26,6 +26,49 @@ ALSA (Advanced Linux Sound Architecture) output device.
 
 OSS (Open Sound System) output device.
 
+@section sdl
+
+SDL (Simple Directmedia Layer) output device.
+
+This output devices allows to show a video stream in an SDL
+window. Only one SDL window is allowed per application, so you can
+have only one instance of this output device in an application.
+
+To enable this output device you need libsdl installed on your system
+when configuring your build.
+
+For more information about SDL, check:
+@url{http://www.libsdl.org/}
+
+@subsection Options
+
+@table @option
+
+@item window_title
+Set the SDL window title, if not specified default to "SDL video
+outdev".
+
+@item icon_title
+Set the name of the iconified SDL window, if not specified it is set
+to the same value of @var{window_title}.
+
+@item window_size
+Set the SDL window size, can be a string of the form
+@var{width}x@var{height} or a video size abbreviation.
+If not specified it defaults to the size of the input video.
+@end table
+
+@subsection Examples
+
+The following command shows the @file{ffmpeg} output is an
+SDL window, forcing its size to the qcif format:
+@example
+ffmpeg -i INPUT -vcodec rawvideo -pix_fmt yuv420p -window_size qcif -f sdl none
+@end example
+
+Note that the name specified for the output device is ignored, so it
+can be set to an arbitrary value ("none" in the above example).
+
 @section sndio
 
 sndio audio output device.
diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index 4bcb5a3ae6..60103a4864 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@@ -21,6 +21,7 @@ OBJS-$(CONFIG_FBDEV_INDEV)               += fbdev.o
 OBJS-$(CONFIG_JACK_INDEV)                += jack_audio.o
 OBJS-$(CONFIG_OSS_INDEV)                 += oss_audio.o
 OBJS-$(CONFIG_OSS_OUTDEV)                += oss_audio.o
+OBJS-$(CONFIG_SDL_OUTDEV)                += sdl.o
 OBJS-$(CONFIG_SNDIO_INDEV)               += sndio_common.o sndio_dec.o
 OBJS-$(CONFIG_SNDIO_OUTDEV)              += sndio_common.o sndio_enc.o
 OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o
diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index 3021f08bda..7846704861 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c
@@ -45,6 +45,7 @@ void avdevice_register_all(void)
     REGISTER_INDEV    (FBDEV, fbdev);
     REGISTER_INDEV    (JACK, jack);
     REGISTER_INOUTDEV (OSS, oss);
+    REGISTER_OUTDEV   (SDL, sdl);
     REGISTER_INOUTDEV (SNDIO, sndio);
     REGISTER_INDEV    (V4L2, v4l2);
 #if FF_API_V4L
diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h
index a31c080f9c..be56be48d7 100644
--- a/libavdevice/avdevice.h
+++ b/libavdevice/avdevice.h
@@ -23,7 +23,7 @@
 #include "libavformat/avformat.h"
 
 #define LIBAVDEVICE_VERSION_MAJOR 53
-#define LIBAVDEVICE_VERSION_MINOR  0
+#define LIBAVDEVICE_VERSION_MINOR  1
 #define LIBAVDEVICE_VERSION_MICRO  0
 
 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
diff --git a/libavdevice/sdl.c b/libavdevice/sdl.c
new file mode 100644
index 0000000000..07f60cd648
--- /dev/null
+++ b/libavdevice/sdl.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2011 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * libSDL output device
+ */
+
+#include <SDL.h>
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "avdevice.h"
+
+typedef struct {
+    AVClass *class;
+    SDL_Surface *surface;
+    SDL_Overlay *overlay;
+    char *window_title;
+    char *icon_title;
+    char *window_size;
+    int window_width, window_height;
+    int overlay_width, overlay_height;
+    int overlay_fmt;
+    int sdl_was_already_inited;
+} SDLContext;
+
+struct sdl_overlay_pix_fmt_entry {
+    enum PixelFormat pix_fmt; int overlay_fmt;
+} sdl_overlay_pix_fmt_map[] = {
+    { PIX_FMT_YUV420P, SDL_IYUV_OVERLAY },
+    { PIX_FMT_YUYV422, SDL_YUY2_OVERLAY },
+    { PIX_FMT_UYVY422, SDL_UYVY_OVERLAY },
+    { PIX_FMT_NONE,    0                },
+};
+
+static int sdl_write_trailer(AVFormatContext *s)
+{
+    SDLContext *sdl = s->priv_data;
+
+    av_freep(&sdl->window_title);
+    av_freep(&sdl->icon_title);
+    av_freep(&sdl->window_size);
+
+    if (sdl->overlay) {
+        SDL_FreeYUVOverlay(sdl->overlay);
+        sdl->overlay = NULL;
+    }
+    if (!sdl->sdl_was_already_inited)
+        SDL_Quit();
+
+    return 0;
+}
+
+static int sdl_write_header(AVFormatContext *s)
+{
+    SDLContext *sdl = s->priv_data;
+    AVStream *st = s->streams[0];
+    AVCodecContext *encctx = st->codec;
+    float sar, dar; /* sample and display aspect ratios */
+    int i, ret;
+
+    if (!sdl->icon_title)
+        sdl->icon_title = av_strdup(sdl->window_title);
+
+    if (SDL_WasInit(SDL_INIT_VIDEO)) {
+        av_log(s, AV_LOG_ERROR,
+               "SDL video subsystem was already inited, aborting.\n");
+        sdl->sdl_was_already_inited = 1;
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (SDL_Init(SDL_INIT_VIDEO) != 0) {
+        av_log(s, AV_LOG_ERROR, "Unable to initialize SDL: %s\n", SDL_GetError());
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (   s->nb_streams > 1
+        || encctx->codec_type != AVMEDIA_TYPE_VIDEO
+        || encctx->codec_id   != CODEC_ID_RAWVIDEO) {
+        av_log(s, AV_LOG_ERROR, "Only supports one rawvideo stream\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    for (i = 0; sdl_overlay_pix_fmt_map[i].pix_fmt != PIX_FMT_NONE; i++) {
+        if (sdl_overlay_pix_fmt_map[i].pix_fmt == encctx->pix_fmt) {
+            sdl->overlay_fmt = sdl_overlay_pix_fmt_map[i].overlay_fmt;
+            break;
+        }
+    }
+
+    if (!sdl->overlay_fmt) {
+        av_log(s, AV_LOG_ERROR,
+               "Unsupported pixel format '%s', choose one of yuv420p, yuyv422, or uyvy422.\n",
+               av_get_pix_fmt_name(encctx->pix_fmt));
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (sdl->window_size) {
+        if (av_parse_video_size(&sdl->window_width, &sdl->window_height,
+                                sdl->window_size) < 0) {
+            av_log(s, AV_LOG_ERROR, "Invalid window size '%s'\n", sdl->window_size);
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+    }
+
+    /* compute overlay width and height from the codec context information */
+    sar = st->sample_aspect_ratio.num ? av_q2d(st->sample_aspect_ratio) : 1;
+    dar = sar * (float)encctx->width / (float)encctx->height;
+
+    /* we suppose the screen has a 1/1 sample aspect ratio */
+    sdl->overlay_height = encctx->height;
+    sdl->overlay_width = ((int)rint(sdl->overlay_height * dar));
+    if (sdl->overlay_width > encctx->width) {
+        sdl->overlay_width = encctx->width;
+        sdl->overlay_height = ((int)rint(sdl->overlay_width / dar));
+    }
+
+    if (!sdl->window_width || !sdl->window_height) {
+        sdl->window_width  = sdl->overlay_width;
+        sdl->window_height = sdl->overlay_height;
+    }
+
+    SDL_WM_SetCaption(sdl->window_title, sdl->icon_title);
+    sdl->surface = SDL_SetVideoMode(sdl->window_width, sdl->window_height,
+                                    24, SDL_SWSURFACE);
+    if (!sdl->surface) {
+        av_log(s, AV_LOG_ERROR, "Unable to set video mode: %s\n", SDL_GetError());
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    sdl->overlay = SDL_CreateYUVOverlay(sdl->overlay_width, sdl->overlay_height,
+                                        sdl->overlay_fmt, sdl->surface);
+    if (!sdl->overlay || sdl->overlay->pitches[0] < sdl->overlay_width) {
+        av_log(s, AV_LOG_ERROR,
+               "SDL does not support an overlay with size of %dx%d pixels.\n",
+               sdl->overlay_width, sdl->overlay_height);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    av_log(s, AV_LOG_INFO, "w:%d h:%d fmt:%s sar:%f -> w:%d h:%d\n",
+           encctx->width, encctx->height, av_get_pix_fmt_name(encctx->pix_fmt), sar,
+           sdl->window_width, sdl->window_height);
+    return 0;
+
+fail:
+    sdl_write_trailer(s);
+    return ret;
+}
+
+static int sdl_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    SDLContext *sdl = s->priv_data;
+    AVCodecContext *encctx = s->streams[0]->codec;
+    SDL_Rect rect = { 0, 0, sdl->window_width, sdl->window_height };
+    AVPicture pict;
+    int i;
+
+    avpicture_fill(&pict, pkt->data, encctx->pix_fmt, encctx->width, encctx->height);
+
+    SDL_FillRect(sdl->surface, &sdl->surface->clip_rect,
+                 SDL_MapRGB(sdl->surface->format, 0, 0, 0));
+    SDL_LockYUVOverlay(sdl->overlay);
+    for (i = 0; i < 3; i++) {
+        sdl->overlay->pixels [i] = pict.data    [i];
+        sdl->overlay->pitches[i] = pict.linesize[i];
+    }
+    SDL_DisplayYUVOverlay(sdl->overlay, &rect);
+    SDL_UnlockYUVOverlay(sdl->overlay);
+
+    SDL_UpdateRect(sdl->surface, 0, 0, sdl->overlay_width, sdl->overlay_height);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SDLContext,x)
+
+static const AVOption options[] = {
+    { "window_title", "SDL window title",           OFFSET(window_title),  FF_OPT_TYPE_STRING, {.str = "SDL video outdev" }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "icon_title",   "SDL iconified window title", OFFSET(icon_title)  ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_size",  "SDL window forced size",     OFFSET(window_size) ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { NULL },
+};
+
+static const AVClass sdl_class = {
+    .class_name = "sdl outdev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVOutputFormat ff_sdl_muxer = {
+    .name           = "sdl",
+    .long_name      = NULL_IF_CONFIG_SMALL("SDL output device"),
+    .priv_data_size = sizeof(SDLContext),
+    .audio_codec    = CODEC_ID_NONE,
+    .video_codec    = CODEC_ID_RAWVIDEO,
+    .write_header   = sdl_write_header,
+    .write_packet   = sdl_write_packet,
+    .write_trailer  = sdl_write_trailer,
+    .flags          = AVFMT_NOFILE,
+    .priv_class     = &sdl_class,
+};

From edfa89b2608889626bb6c6b177283b6fd16716cf Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 28 May 2011 18:36:07 +0100
Subject: [PATCH 425/830] ARM: unbreak build

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a5abfdd128..a5a5dfab64 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -1,5 +1,4 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
-                                          arm/ac3dsp_arm.o
 
 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 

From dc0ad40de2b0d6995eb842e56b22f9096bd539ff Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Sat, 28 May 2011 14:40:16 -0400
Subject: [PATCH 426/830] ac3dsp: fix loop condition in
 ac3_update_bap_counts_c()

---
 libavcodec/ac3dsp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index de58f3ab26..8ce5f8d2c5 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -131,7 +131,7 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
 static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
                                     int len)
 {
-    while (len-- >= 0)
+    while (len-- > 0)
         mant_cnt[bap[len]]++;
 }
 

From f8f3f6c40bb452bb716096f455391f3fba324190 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 20:38:06 +0200
Subject: [PATCH 427/830] v4l2: use OFFSET macro when setting options

Improve readability.
---
 libavdevice/v4l2.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 830fe0c6e9..d06ec29a87 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -715,9 +715,10 @@ static int v4l2_read_close(AVFormatContext *s1)
 
 #define OFFSET(x) offsetof(struct video_data, x)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
+
 static const AVOption options[] = {
-    { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
-    { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
+    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { "channel",  "", OFFSET(channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };

From af344a69f3a8240b508183586617fba6af698bb2 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 20:47:17 +0200
Subject: [PATCH 428/830] v4l2: set default standard to NULL

Avoid a failure with the default value of "NTSC". Not all drivers
support a standard.
---
 libavdevice/v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index d06ec29a87..deffa92280 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -717,7 +717,7 @@ static int v4l2_read_close(AVFormatContext *s1)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 
 static const AVOption options[] = {
-    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", OFFSET(channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },

From 72c60f3ecdd8a9e1760c3929376de3f17ab9e74a Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 20:58:32 +0200
Subject: [PATCH 429/830] v4l2: remove pointless empty lines

---
 libavdevice/v4l2.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index deffa92280..868328e359 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -117,7 +117,6 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities)
     if (fd < 0) {
         av_log(ctx, AV_LOG_ERROR, "Cannot open video device %s : %s\n",
                  ctx->filename, strerror(errno));
-
         return AVERROR(errno);
     }
 
@@ -133,13 +132,11 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities)
         av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QUERYCAP): %s\n",
                  strerror(errno));
         close(fd);
-
         return AVERROR(err);
     }
     if ((cap.capabilities & V4L2_CAP_VIDEO_CAPTURE) == 0) {
         av_log(ctx, AV_LOG_ERROR, "Not a video capture device\n");
         close(fd);
-
         return AVERROR(ENODEV);
     }
     *capabilities = cap.capabilities;
@@ -251,27 +248,23 @@ static int mmap_init(AVFormatContext *ctx)
         } else {
             av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_REQBUFS)\n");
         }
-
         return AVERROR(errno);
     }
 
     if (req.count < 2) {
         av_log(ctx, AV_LOG_ERROR, "Insufficient buffer memory\n");
-
         return AVERROR(ENOMEM);
     }
     s->buffers = req.count;
     s->buf_start = av_malloc(sizeof(void *) * s->buffers);
     if (s->buf_start == NULL) {
         av_log(ctx, AV_LOG_ERROR, "Cannot allocate buffer pointers\n");
-
         return AVERROR(ENOMEM);
     }
     s->buf_len = av_malloc(sizeof(unsigned int) * s->buffers);
     if (s->buf_len == NULL) {
         av_log(ctx, AV_LOG_ERROR, "Cannot allocate buffer sizes\n");
         av_free(s->buf_start);
-
         return AVERROR(ENOMEM);
     }
 
@@ -285,7 +278,6 @@ static int mmap_init(AVFormatContext *ctx)
         res = ioctl(s->fd, VIDIOC_QUERYBUF, &buf);
         if (res < 0) {
             av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QUERYBUF)\n");
-
             return AVERROR(errno);
         }
 
@@ -299,7 +291,6 @@ static int mmap_init(AVFormatContext *ctx)
                         PROT_READ | PROT_WRITE, MAP_SHARED, s->fd, buf.m.offset);
         if (s->buf_start[i] == MAP_FAILED) {
             av_log(ctx, AV_LOG_ERROR, "mmap: %s\n", strerror(errno));
-
             return AVERROR(errno);
         }
     }
@@ -353,7 +344,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
     if (res < 0) {
         if (errno == EAGAIN) {
             pkt->size = 0;
-
             return AVERROR(EAGAIN);
         }
         av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_DQBUF): %s\n", strerror(errno));
@@ -363,7 +353,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
     assert (buf.index < s->buffers);
     if (s->frame_size > 0 && buf.bytesused != s->frame_size) {
         av_log(ctx, AV_LOG_ERROR, "The v4l2 frame is %d bytes, but %d bytes are expected\n", buf.bytesused, s->frame_size);
-
         return AVERROR_INVALIDDATA;
     }
 
@@ -411,7 +400,6 @@ static int mmap_start(AVFormatContext *ctx)
         res = ioctl(s->fd, VIDIOC_QBUF, &buf);
         if (res < 0) {
             av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n", strerror(errno));
-
             return AVERROR(errno);
         }
     }
@@ -420,7 +408,6 @@ static int mmap_start(AVFormatContext *ctx)
     res = ioctl(s->fd, VIDIOC_STREAMON, &type);
     if (res < 0) {
         av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_STREAMON): %s\n", strerror(errno));
-
         return AVERROR(errno);
     }
 
@@ -654,7 +641,6 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     }
     if (res < 0) {
         close(s->fd);
-
         res = AVERROR(EIO);
         goto out;
     }

From 895e4de8d5a0760a48ba968546693d9094a62013 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 20:59:27 +0200
Subject: [PATCH 430/830] v4l2: create file @doxy from text in the copyright
 header

---
 libavdevice/v4l2.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 868328e359..29ca01e7b5 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -1,15 +1,7 @@
 /*
- * Video4Linux2 grab interface
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2006 Luca Abeni
  *
- * Part of this file is based on the V4L2 video capture example
- * (http://v4l2spec.bytesex.org/v4l2spec/capture.c)
- *
- * Thanks to Michael Niedermayer for providing the mapping between
- * V4L2_PIX_FMT_* and PIX_FMT_*
- *
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -27,6 +19,17 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Video4Linux2 grab interface
+ *
+ * Part of this file is based on the V4L2 video capture example
+ * (http://v4l2spec.bytesex.org/v4l2spec/capture.c)
+ *
+ * Thanks to Michael Niedermayer for providing the mapping between
+ * V4L2_PIX_FMT_* and PIX_FMT_*
+ */
+
 #undef __STRICT_ANSI__ //workaround due to broken kernel headers
 #include "config.h"
 #include <unistd.h>

From 485d73ef215323ead6cb58a83853d1f6b52ede54 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 26 May 2011 09:15:38 -0400
Subject: [PATCH 431/830] swscale: replace formatConvBuffer[VOF] by allocated
 array.

This allows to convert between formats of arbitrary width,
regardless of the value of VOF/VOFW.
---
 libswscale/swscale_internal.h | 2 +-
 libswscale/swscale_template.c | 5 +++--
 libswscale/utils.c            | 2 ++
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 70eec6219b..9f656c9af2 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -118,7 +118,7 @@ typedef struct SwsContext {
     int       chrBufIndex;        ///< Index in ring buffer of the last scaled horizontal chroma     line from source.
     //@}
 
-    uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
+    uint8_t *formatConvBuffer;
 
     /**
      * @name Horizontal and vertical filters.
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 586614f989..2957aa4719 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -490,9 +490,10 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
     src2 += c->chrSrcOffset;
 
     if (c->chrToYV12) {
-        c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
+        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
         src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
+        src2= buf2;
     }
 
     if (c->hScale16) {
diff --git a/libswscale/utils.c b/libswscale/utils.c
index eba7f82d9e..b0548dcf8e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -798,6 +798,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
         return AVERROR(EINVAL);
     }
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
 
     if (!dstFilter) dstFilter= &dummyFilter;
     if (!srcFilter) srcFilter= &dummyFilter;
@@ -1522,6 +1523,7 @@ void sws_freeContext(SwsContext *c)
 #endif /* HAVE_MMX */
 
     av_freep(&c->yuvTable);
+    av_freep(&c->formatConvBuffer);
 
     av_free(c);
 }

From 9222dddb448789b138a106ed677f3c628c85a840 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 25 May 2011 15:28:12 -0400
Subject: [PATCH 432/830] swscale: use av_clip_uint8() in yuv2yuv1_c().

---
 libswscale/swscale_template.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 2957aa4719..7078a99b18 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -49,29 +49,15 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
     int i;
     for (i=0; i<dstW; i++) {
         int val= (lumSrc[i]+64)>>7;
-
-        if (val&256) {
-            if (val<0) val=0;
-            else       val=255;
-        }
-
-        dest[i]= val;
+        dest[i]= av_clip_uint8(val);
     }
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
             int u=(chrSrc[i       ]+64)>>7;
             int v=(chrSrc[i + VOFW]+64)>>7;
-
-            if ((u|v)&256) {
-                if (u<0)        u=0;
-                else if (u>255) u=255;
-                if (v<0)        v=0;
-                else if (v>255) v=255;
-            }
-
-            uDest[i]= u;
-            vDest[i]= v;
+            uDest[i]= av_clip_uint8(u);
+            vDest[i]= av_clip_uint8(v);
         }
 
     if (CONFIG_SWSCALE_ALPHA && aDest)

From 70bb747a57d8df6f33803bb4824b0a447c708823 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Sat, 28 May 2011 14:39:18 -0400
Subject: [PATCH 433/830] ac3dsp: do not use the ff_* prefix when referencing
 ff_ac3_bap_bits.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this should fix the windows builds

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/x86/ac3dsp.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 0d8f4b78eb..6892ec2765 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -324,8 +324,8 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
     paddw       m1, [mant_cntq+ 9*16]
     paddw       m0, [mant_cntq+10*16]
     paddw       m1, [mant_cntq+11*16]
-    pmaddwd     m0, [ff_ac3_bap_bits   ]
-    pmaddwd     m1, [ff_ac3_bap_bits+16]
+    pmaddwd     m0, [ac3_bap_bits   ]
+    pmaddwd     m1, [ac3_bap_bits+16]
     paddd       m0, m1
     PHADDD4     m0, m1
     movd      sumd, m0

From 90da52f01f8b6c22af22a002eb226989b1cf7ef8 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 28 May 2011 20:27:09 +0100
Subject: [PATCH 434/830] ac3enc: fix LOCAL_ALIGNED usage in
 count_mantissa_bits()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/ac3enc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 66dfc29217..dbe7784eae 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1519,7 +1519,7 @@ static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
 static int count_mantissa_bits(AC3EncodeContext *s)
 {
     int ch, max_end_freq;
-    LOCAL_ALIGNED_16(uint16_t, mant_cnt,[AC3_MAX_BLOCKS][16]);
+    LOCAL_ALIGNED_16(uint16_t, mant_cnt, [AC3_MAX_BLOCKS], [16]);
 
     count_mantissa_bits_init(mant_cnt);
 

From 011b098cd0ae7296651d36ae7da400d5d04337c3 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 27 May 2011 16:31:59 -0700
Subject: [PATCH 435/830] doc: correct AC-3 option subsection placement

Floating-Point-Only section was added after the video encoders chapter
in 034fc7b merge.
---
 doc/encoders.texi | 88 +++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 9b5c2af008..f21f8ffd82 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -369,6 +369,49 @@ is highly recommended that it be left as enabled except for testing purposes.
 
 @end table
 
+@subsection Floating-Point-Only AC-3 Encoding Options
+
+These options are only valid for the floating-point encoder and do not exist
+for the fixed-point encoder due to the corresponding features not being
+implemented in fixed-point.
+
+@table @option
+
+@item -channel_coupling @var{boolean}
+Enables/Disables use of channel coupling, which is an optional AC-3 feature
+that increases quality by combining high frequency information from multiple
+channels into a single channel. The per-channel high frequency information is
+sent with less accuracy in both the frequency and time domains. This allows
+more bits to be used for lower frequencies while preserving enough information
+to reconstruct the high frequencies. This option is enabled by default for the
+floating-point encoder and should generally be left as enabled except for
+testing purposes or to increase encoding speed.
+@table @option
+@item -1
+@itemx auto
+Selected by Encoder (default)
+@item 0
+@itemx off
+Disable Channel Coupling
+@item 1
+@itemx on
+Enable Channel Coupling
+@end table
+
+@item -cpl_start_band @var{number}
+Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a
+value higher than the bandwidth is used, it will be reduced to 1 less than the
+coupling end band. If @var{auto} is used, the start band will be determined by
+the encoder based on the bit rate, sample rate, and channel layout. This option
+has no effect if channel coupling is disabled.
+@table @option
+@item -1
+@itemx auto
+Selected by Encoder (default)
+@end table
+
+@end table
+
 @c man end AUDIO ENCODERS
 
 @chapter Video Encoders
@@ -433,48 +476,3 @@ For more information about libx264 and the supported options see:
 @url{http://www.videolan.org/developers/x264.html}
 
 @c man end VIDEO ENCODERS
-
-@subheading Floating-Point-Only AC-3 Encoding Options
-
-These options are only valid for the floating-point encoder and do not exist
-for the fixed-point encoder due to the corresponding features not being
-implemented in fixed-point.
-
-@table @option
-
-@item -channel_coupling @var{boolean}
-Enables/Disables use of channel coupling, which is an optional AC-3 feature
-that increases quality by combining high frequency information from multiple
-channels into a single channel. The per-channel high frequency information is
-sent with less accuracy in both the frequency and time domains. This allows
-more bits to be used for lower frequencies while preserving enough information
-to reconstruct the high frequencies. This option is enabled by default for the
-floating-point encoder and should generally be left as enabled except for
-testing purposes or to increase encoding speed.
-@table @option
-@item -1
-@itemx auto
-Selected by Encoder (default)
-@item 0
-@itemx off
-Disable Channel Coupling
-@item 1
-@itemx on
-Enable Channel Coupling
-@end table
-
-@item -cpl_start_band @var{number}
-Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a
-value higher than the bandwidth is used, it will be reduced to 1 less than the
-coupling end band. If @var{auto} is used, the start band will be determined by
-the encoder based on the bit rate, sample rate, and channel layout. This option
-has no effect if channel coupling is disabled.
-@table @option
-@item -1
-@itemx auto
-Selected by Encoder (default)
-@end table
-
-@end table
-
-@c man end AUDIO ENCODERS

From ea535ed50d1b8d751e2d194a987295ab38daf1a2 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 29 May 2011 00:11:39 +0200
Subject: [PATCH 436/830] Revert 1a5e4fd8c5b99478b4e08a69261930bb12aa948b for
 postproc. This broke the code

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libpostproc/postprocess.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 03e5f194d7..b2c35f537e 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -86,7 +86,6 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 //#define DEBUG_BRIGHTNESS
 #include "postprocess.h"
 #include "postprocess_internal.h"
-#include "libavutil/avstring.h"
 
 unsigned postproc_version(void)
 {
@@ -767,7 +766,8 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
     ppMode->maxClippedThreshold= 0.01;
     ppMode->error=0;
 
-    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE);
+#undef strncpy
+    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 
     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 

From 986f0d86cbdc92f46e5fbba05fb29526b76162be Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 28 May 2011 15:52:50 +0200
Subject: [PATCH 437/830] Commits that could not be pulled earlier due to bugs.

    commit 93681fbd5082a3af896b7a730dacdd27a3052406
    Author: Ronald S. Bultje <rsbultje@gmail.com>
    Date:   Thu May 26 11:32:32 2011 -0400

        swscale: fix compile on ppc.

    commit e758573a887cfb1155e81499ca54f433127cf24e
    Author: Ronald S. Bultje <rsbultje@gmail.com>
    Date:   Thu May 26 10:36:47 2011 -0400

        swscale: fix compile on x86-32.

    commit 0f4eb8b04341081591bf401eaa2c07d6bc3ff52e
    Author: Ronald S. Bultje <rsbultje@gmail.com>
    Date:   Thu May 26 09:17:52 2011 -0400

        swscale: remove VOF/VOFW.

    commit b4a224c5e4109cb2cca8bac38628673d685fe763
    Author: Ronald S. Bultje <rsbultje@gmail.com>
    Date:   Wed May 25 14:30:09 2011 -0400

        swscale: split chroma buffers into separate U/V planes.

        Preparatory step to implement support for sizes > VOFW.
---
 libswscale/ppc/swscale_altivec_template.c |  30 +-
 libswscale/ppc/swscale_template.c         |  25 +-
 libswscale/ppc/yuv2rgb_altivec.c          |  17 +-
 libswscale/swscale.c                      |  87 +++--
 libswscale/swscale_internal.h             |  38 ++-
 libswscale/swscale_template.c             | 134 ++++----
 libswscale/utils.c                        |  30 +-
 libswscale/x86/swscale_template.c         | 384 +++++++++++++---------
 8 files changed, 430 insertions(+), 315 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index c7aa0fd2e6..d142c62e61 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
 }
 
 static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                      const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                      uint8_t *vDest, int dstW, int chrDstW)
 {
     const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
     register int i, j;
@@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
             vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
             vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
 
-            perm = vec_lvsl(0, chrSrc[j]);
-            l1 = vec_ld(0, chrSrc[j]);
-            l1_V = vec_ld(VOFW << 1, chrSrc[j]);
+            perm = vec_lvsl(0, chrUSrc[j]);
+            l1 = vec_ld(0, chrUSrc[j]);
+            l1_V = vec_ld(0, chrVSrc[j]);
 
             for (i = 0; i < (chrDstW - 7); i+=8) {
                 int offset = i << 2;
-                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
-                vector signed short l2_V = vec_ld(((i + VOFW) << 1) + 16, chrSrc[j]);
+                vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
+                vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
 
                 vector signed int v1 = vec_ld(offset, u);
                 vector signed int v2 = vec_ld(offset + 16, u);
                 vector signed int v1_V = vec_ld(offset, v);
                 vector signed int v2_V = vec_ld(offset + 16, v);
 
-                vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
-                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+VOFW] ... chrSrc[j][i+2055]
+                vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
+                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
 
                 vector signed int i1 = vec_mule(vChrFilter, ls);
                 vector signed int i2 = vec_mulo(vChrFilter, ls);
@@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
 
                 vector signed int vf1 = vec_mergeh(i1, i2);
-                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
                 vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
-                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
 
                 vector signed int vo1 = vec_add(v1, vf1);
                 vector signed int vo2 = vec_add(v2, vf2);
@@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
                 l1_V = l2_V;
             }
             for ( ; i < chrDstW; i++) {
-                u[i] += chrSrc[j][i] * chrFilter[j];
-                v[i] += chrSrc[j][i + VOFW] * chrFilter[j];
+                u[i] += chrUSrc[j][i] * chrFilter[j];
+                v[i] += chrVSrc[j][i] * chrFilter[j];
             }
         }
         altivec_packIntArrayToCharArray(u, uDest, chrDstW);
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 617b14cb1e..7eb2e3e28a 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -24,21 +24,28 @@
 #endif
 
 #if COMPILE_TEMPLATE_ALTIVEC
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
+static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc, int chrFilterSize,
+                                    const int16_t **alpSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                    uint8_t *aDest, long dstW, long chrDstW)
 {
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                          chrFilter, chrSrc, chrFilterSize,
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                           dest, uDest, vDest, dstW, chrDstW);
 }
 
 /**
  * vertical scale YV12 to RGB
  */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
+                                       const int16_t **lumSrc, int lumFilterSize,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc, int chrFilterSize,
+                                       const int16_t **alpSrc, uint8_t *dest,
+                                       long dstW, long dstY)
 {
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
@@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
           c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
           c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
             ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                                    dest, dstW, dstY);
     else
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
 #endif
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 09b72ae846..45d4ca7347 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -778,10 +778,11 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 
 
 void
-ff_yuv2packedX_altivec(SwsContext *c,
-                       const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
-                     uint8_t *dest, int dstW, int dstY)
+ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc, int chrFilterSize,
+                       uint8_t *dest, int dstW, int dstY)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+VOFW]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
@@ -894,9 +895,9 @@ ff_yuv2packedX_altivec(SwsContext *c,
         V = RND;
         /* extract 8 coeffs from U,V */
         for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
+            X  = vec_ld (0, &chrUSrc[j][i/2]);
             U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+VOFW]);
+            X  = vec_ld (0, &chrVSrc[j][i/2]);
             V  = vec_mradds (X, CCoeffs[j], V);
         }
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 00941c9dfc..be8491e217 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -301,7 +301,8 @@ uint16_t dither_scale[15][16]={
 };
 
 static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                                    const int16_t **chrVSrc, int chrFilterSize,
                                                     const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
                                                     int dstW, int chrDstW, int big_endian, int output_bits)
 {
@@ -340,8 +341,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
             int j;
 
             for (j = 0; j < chrFilterSize; j++) {
-                u += chrSrc[j][i       ] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             output_pixel(&uDest[i], u);
@@ -362,28 +363,50 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
     }
 }
 
+#define yuv2NBPS(bits, BE_LE, is_be) \
+static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
+                              const int16_t **lumSrc, int lumFilterSize, \
+                              const int16_t *chrFilter, const int16_t **chrUSrc, \
+                              const int16_t **chrVSrc, \
+                              int chrFilterSize, const int16_t **alpSrc, \
+                              uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
+                              uint16_t *aDest, int dstW, int chrDstW) \
+{ \
+    yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
+                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                           alpSrc, \
+                           dest, uDest, vDest, aDest, \
+                           dstW, chrDstW, is_be, bits); \
+}
+yuv2NBPS( 9, BE, 1);
+yuv2NBPS( 9, LE, 0);
+yuv2NBPS(10, BE, 1);
+yuv2NBPS(10, LE, 0);
+yuv2NBPS(16, BE, 1);
+yuv2NBPS(16, LE, 0);
+
 static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
                                  const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
                                  enum PixelFormat dstFormat)
 {
     if (isNBPS(dstFormat)) {
         const int depth = av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1+1;
         yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
-                              chrFilter, chrSrc, chrFilterSize,
+                              chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                               alpSrc,
                               dest, uDest, vDest, aDest,
                               dstW, chrDstW, isBE(dstFormat), depth);
     } else {
         if (isBE(dstFormat)) {
             yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                                    alpSrc,
                                    dest, uDest, vDest, aDest,
                                    dstW, chrDstW, 1, 16);
         } else {
             yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrSrc, chrFilterSize,
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                                    alpSrc,
                                    dest, uDest, vDest, aDest,
                                    dstW, chrDstW, 0, 16);
@@ -392,7 +415,8 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
 }
 
 static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc, int chrFilterSize,
                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
 {
     //FIXME Optimize (just quickly written not optimized..)
@@ -412,8 +436,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[i]= av_clip_uint8(u>>19);
@@ -433,7 +457,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 }
 
 static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
                                 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
 {
     //FIXME Optimize (just quickly written not optimized..)
@@ -456,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[2*i]= av_clip_uint8(u>>19);
@@ -469,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             int v=1<<18;
             int j;
             for (j=0; j<chrFilterSize; j++) {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
+                u += chrUSrc[j][i] * chrFilter[j];
+                v += chrVSrc[j][i] * chrFilter[j];
             }
 
             uDest[2*i]= av_clip_uint8(v>>19);
@@ -494,8 +519,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
         }\
         for (j=0; j<chrFilterSize; j++) {\
-            U += chrSrc[j][i] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+            U += chrUSrc[j][i] * chrFilter[j];\
+            V += chrVSrc[j][i] * chrFilter[j];\
         }\
         Y1>>=19;\
         Y2>>=19;\
@@ -542,8 +567,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
             Y += lumSrc[j][i     ] * lumFilter[j];\
         }\
         for (j=0; j<chrFilterSize; j++) {\
-            U += chrSrc[j][i     ] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+            U += chrUSrc[j][i] * chrFilter[j];\
+            V += chrVSrc[j][i] * chrFilter[j];\
         }\
         Y >>=10;\
         U >>=10;\
@@ -608,8 +633,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;       \
         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
-        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
-        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
+        int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
+        int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
         type av_unused *r, *b, *g;                                    \
         int av_unused A1, A2;                                         \
         if (alpha) {\
@@ -634,8 +659,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
         int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf1[i     ])>>7;\
-        int V= (uvbuf1[i+VOFW])>>7;\
+        int U= (ubuf1[i])>>7;\
+        int V= (vbuf1[i])>>7;\
         type av_unused *r, *b, *g;\
         int av_unused A1, A2;\
         if (alpha) {\
@@ -660,8 +685,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
         int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
-        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
+        int U= (ubuf0[i] + ubuf1[i])>>8;\
+        int V= (vbuf0[i] + vbuf1[i])>>8;\
         type av_unused *r, *b, *g;\
         int av_unused A1, A2;\
         if (alpha) {\
@@ -943,16 +968,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         break;\
     }
 
-static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter,
+                                  const int16_t **lumSrc, int lumFilterSize,
+                                  const int16_t *chrFilter, const int16_t **chrUSrc,
+                                  const int16_t **chrVSrc, int chrFilterSize,
                                   const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 }
 
-static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc, int chrFilterSize,
                                     const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9f656c9af2..cac40b22f8 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -35,10 +35,6 @@
 
 #define MAX_FILTER_SIZE 256
 
-#define VOFW 21504
-
-#define VOF  (VOFW*2)
-
 #if HAVE_BIGENDIAN
 #define ALT32_CORR (-1)
 #else
@@ -108,7 +104,8 @@ typedef struct SwsContext {
      */
     //@{
     int16_t **lumPixBuf;          ///< Ring buffer for scaled horizontal luma   plane lines to be fed to the vertical scaler.
-    int16_t **chrPixBuf;          ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **chrUPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
+    int16_t **chrVPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
     int16_t **alpPixBuf;          ///< Ring buffer for scaled horizontal alpha  plane lines to be fed to the vertical scaler.
     int       vLumBufSize;        ///< Number of vertical luma/alpha lines allocated in the ring buffer.
     int       vChrBufSize;        ///< Number of vertical chroma     lines allocated in the ring buffer.
@@ -196,6 +193,7 @@ typedef struct SwsContext {
 #define V_TEMP                "11*8+4*4*256*2+32"
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
+#define UV_OFF                "11*8+4*4*256*3+48"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -218,6 +216,7 @@ typedef struct SwsContext {
     DECLARE_ALIGNED(8, uint64_t, v_temp);
     DECLARE_ALIGNED(8, uint64_t, y_temp);
     int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
 
 #if HAVE_ALTIVEC
     vector signed short   CY;
@@ -251,36 +250,42 @@ typedef struct SwsContext {
     /* function pointers for swScale() */
     void (*yuv2nv12X  )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         uint8_t *dest, uint8_t *uDest,
                         int dstW, int chrDstW, int dstFormat);
     void (*yuv2yuv1   )(struct SwsContext *c,
-                        const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
+                        const int16_t *lumSrc, const int16_t *chrUSrc,
+                        const int16_t *chrVSrc, const int16_t *alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
                         long dstW, long chrDstW);
     void (*yuv2yuvX   )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
                         long dstW, long chrDstW);
     void (*yuv2packed1)(struct SwsContext *c,
                         const uint16_t *buf0,
-                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                         const uint16_t *abuf0,
                         uint8_t *dest,
                         int dstW, int uvalpha, int dstFormat, int flags, int y);
     void (*yuv2packed2)(struct SwsContext *c,
                         const uint16_t *buf0, const uint16_t *buf1,
-                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                         const uint16_t *abuf0, const uint16_t *abuf1,
                         uint8_t *dest,
                         int dstW, int yalpha, int uvalpha, int y);
     void (*yuv2packedX)(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc, uint8_t *dest,
                         long dstW, long dstY);
 
@@ -295,7 +300,7 @@ typedef struct SwsContext {
                          int16_t *dst, long dstWidth,
                          const uint8_t *src, int srcW, int xInc);
     void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst, long dstWidth,
+                         int16_t *dst1, int16_t *dst2, long dstWidth,
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
 
@@ -308,7 +313,7 @@ typedef struct SwsContext {
                    long filterSize, int shift);
 
     void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
-    void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed.
+    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
 
     int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions.
     int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions.
@@ -332,9 +337,10 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
-void ff_yuv2packedX_altivec(SwsContext *c,
-                            const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
                             uint8_t *dest, int dstW, int dstY);
 
 const char *sws_format_name(enum PixelFormat format);
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 7078a99b18..0f40f5b95e 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -20,29 +20,32 @@
 
 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrSrc,
+                              const int16_t *chrFilter, const int16_t **chrUSrc,
+                              const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                               uint8_t *aDest, long dstW, long chrDstW)
 {
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrSrc, chrFilterSize,
+                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
 }
 
 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrSrc,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc,
                                int chrFilterSize, uint8_t *dest, uint8_t *uDest,
                                int dstW, int chrDstW, enum PixelFormat dstFormat)
 {
     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrSrc, chrFilterSize,
+                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                  dest, uDest, dstW, chrDstW, dstFormat);
 }
 
 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                              const int16_t *chrSrc, const int16_t *alpSrc,
+                              const int16_t *chrUSrc, const int16_t *chrVSrc,
+                              const int16_t *alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                               uint8_t *aDest, long dstW, long chrDstW)
 {
@@ -54,8 +57,8 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
-            int u=(chrSrc[i       ]+64)>>7;
-            int v=(chrSrc[i + VOFW]+64)>>7;
+            int u=(chrUSrc[i]+64)>>7;
+            int v=(chrVSrc[i]+64)>>7;
             uDest[i]= av_clip_uint8(u);
             vDest[i]= av_clip_uint8(v);
         }
@@ -73,12 +76,13 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
  */
 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                                  const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrSrc,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
                                  int chrFilterSize, const int16_t **alpSrc,
                                  uint8_t *dest, long dstW, long dstY)
 {
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
 
@@ -86,8 +90,9 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
  * vertical bilinear scale YV12 to RGB
  */
 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *uvbuf0,
-                                 const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
                                  const uint16_t *abuf1, uint8_t *dest, int dstW,
                                  int yalpha, int uvalpha, int y)
 {
@@ -102,7 +107,8 @@ static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
  * YV12 to RGB without scaling or interpolating
  */
 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
                                  const uint16_t *abuf0, uint8_t *dest, int dstW,
                                  int uvalpha, enum PixelFormat dstFormat,
                                  int flags, int y)
@@ -373,20 +379,20 @@ static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int
 
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
-static void chrRangeToJpeg_c(int16_t *dst, int width)
+static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
-        dst[i     ] = (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
-        dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
+        dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
+        dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
     }
 }
-static void chrRangeFromJpeg_c(int16_t *dst, int width)
+static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
-        dst[i     ] = (dst[i     ]*1799 + 4081085)>>11; //1469
-        dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
+        dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
+        dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
     }
 }
 static void lumRangeToJpeg_c(int16_t *dst, int width)
@@ -446,7 +452,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
         convertRange(dst, dstWidth);
 }
 
-static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
+static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                   long dstWidth, const uint8_t *src1,
                                   const uint8_t *src2, int srcW, int xInc)
 {
@@ -455,17 +461,13 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
     for (i=0;i<dstWidth;i++) {
         register unsigned int xx=xpos>>16;
         register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
-        dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
-        /* slower
-        dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
-        dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
-        */
+        dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+        dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
         xpos+=xInc;
     }
 }
 
-inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth,
                              const uint8_t *src1, const uint8_t *src2,
                              int srcW, int xInc, const int16_t *hChrFilter,
                              const int16_t *hChrFilterPos, int hChrFilterSize,
@@ -484,17 +486,17 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
 
     if (c->hScale16) {
         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-        c->hScale16(dst     , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
-        c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+        c->hScale16(dst1, dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+        c->hScale16(dst2, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
     } else if (!c->hcscale_fast) {
-        c->hScale(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
+        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
     }
 
     if (c->chrConvertRange)
-        c->chrConvertRange(dst, dstWidth);
+        c->chrConvertRange(dst1, dst2, dstWidth);
 }
 
 #define DEBUG_SWSCALE_BUFFERS 0
@@ -534,7 +536,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
     const int hLumFilterSize= c->hLumFilterSize;
     const int hChrFilterSize= c->hChrFilterSize;
     int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -662,10 +665,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
             //FIXME replace parameters through context struct (some at least)
 
             if (c->needs_hcscale)
-                hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                                hChrFilter, hChrFilterPos, hChrFilterSize,
-                                formatConvBuffer,
-                                pal);
+                hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
+                          chrDstW, src1, src2, chrSrcW, chrXInc,
+                          hChrFilter, hChrFilterPos, hChrFilterSize,
+                          formatConvBuffer, pal);
             lastInChrBuf++;
             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
                                chrBufIndex,    lastInChrBuf);
@@ -681,47 +684,54 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
 #endif
         if (dstY < dstH-2) {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
                 c->yuv2nv12X(c,
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                              dest, uDest, dstW, chrDstW, dstFormat);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                    yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                  chrVSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
+                                  (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
                                   dstFormat);
                 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                     const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrBuf= chrSrcPtr[0];
+                    const int16_t *chrUBuf= chrUSrcPtr[0];
+                    const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
+                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
+                                uDest, vDest, aDest, dstW, chrDstW);
                 } else { //General YV12
                     c->yuv2yuvX(c,
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                chrVSrcPtr, vChrFilterSize,
                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
                 }
             } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c, //FIXME write a packed1_full function
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
+                                         chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
                                        alpPixBuf ? *alpSrcPtr : NULL,
                                        dest, dstW, chrAlpha, dstFormat, flags, dstY);
                     }
@@ -735,10 +745,11 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c, //FIXME write a packed2_full function
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
                                        alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
                                        dest, dstW, lumAlpha, chrAlpha, dstY);
                     }
@@ -746,26 +757,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     if(flags & SWS_FULL_CHR_H_INT) {
                         yuv2rgbXinC_full(c,
                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                          alpSrcPtr, dest, dstW, dstY);
                     } else {
                         c->yuv2packedX(c,
                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                        alpSrcPtr, dest, dstW, dstY);
                     }
                 }
             }
         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
             const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
                 yuv2nv12XinC(
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                              dest, uDest, dstW, chrDstW, dstFormat);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
@@ -773,27 +785,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
                     yuv2yuvX16inC(
                                   vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
                                   dstFormat);
                 } else {
                     yuv2yuvXinC(
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
                 }
             } else {
                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if(flags & SWS_FULL_CHR_H_INT) {
                     yuv2rgbXinC_full(c,
                                      vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                     vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                      alpSrcPtr, dest, dstW, dstY);
                 } else {
                     yuv2packedXinC(c,
                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                    alpSrcPtr, dest, dstW, dstY);
                 }
             }
diff --git a/libswscale/utils.c b/libswscale/utils.c
index b0548dcf8e..20cc3f187a 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -753,6 +753,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     int srcH= c->srcH;
     int dstW= c->dstW;
     int dstH= c->dstH;
+    int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16), dst_stride_px = dst_stride >> 1;
     int flags, cpu_flags;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
@@ -794,10 +795,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }
-    if(srcW > VOFW || dstW > VOFW) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
-        return AVERROR(EINVAL);
-    }
     FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
 
     if (!dstFilter) dstFilter= &dummyFilter;
@@ -1001,29 +998,31 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     // allocate pixbufs (we use dynamic allocation because otherwise we would need to
     // allocate several megabytes to handle all possible cases)
     FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail);
-    FF_ALLOC_OR_GOTO(c, c->chrPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
+    FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
+    FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail);
     if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
         FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail);
     //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
     /* align at 16 bytes for AltiVec */
     for (i=0; i<c->vLumBufSize; i++) {
-        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], VOF+1, fail);
+        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+1, fail);
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
+    c->uv_off = dst_stride_px;
     for (i=0; i<c->vChrBufSize; i++) {
-        FF_ALLOC_OR_GOTO(c, c->chrPixBuf[i+c->vChrBufSize], (VOF+1)*2, fail);
-        c->chrPixBuf[i] = c->chrPixBuf[i+c->vChrBufSize];
+        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
+        c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
+        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + dst_stride_px;
     }
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
         for (i=0; i<c->vLumBufSize; i++) {
-            FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], VOF+1, fail);
+            FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], dst_stride+1, fail);
             c->alpPixBuf[i] = c->alpPixBuf[i+c->vLumBufSize];
         }
 
     //try to avoid drawing green stuff between the right end and the stride end
-    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
-
-    assert(2*VOFW == VOF);
+    for (i=0; i<c->vChrBufSize; i++)
+        memset(c->chrUPixBuf[i], 64, dst_stride*2+1);
 
     assert(c->chrDstH <= dstH);
 
@@ -1481,10 +1480,11 @@ void sws_freeContext(SwsContext *c)
         av_freep(&c->lumPixBuf);
     }
 
-    if (c->chrPixBuf) {
+    if (c->chrUPixBuf) {
         for (i=0; i<c->vChrBufSize; i++)
-            av_freep(&c->chrPixBuf[i]);
-        av_freep(&c->chrPixBuf);
+            av_freep(&c->chrUPixBuf[i]);
+        av_freep(&c->chrUPixBuf);
+        av_freep(&c->chrVPixBuf);
     }
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4181cce9b5..0e8c381c04 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -37,9 +37,8 @@
 #endif
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
-#define YSCALEYUV2YV12X(x, offset, dest, width) \
+#define YSCALEYUV2YV12X(offset, dest, end, pos) \
     __asm__ volatile(\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
         "movq                             %%mm3, %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -47,8 +46,8 @@
         ".p2align                             4             \n\t" /* FIXME Unroll? */\
         "1:                                                 \n\t"\
         "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm5      \n\t" /* srcData */\
         "add                                $16, %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "test                         %%"REG_S", %%"REG_S"  \n\t"\
@@ -61,40 +60,40 @@
         "psraw                               $3, %%mm4      \n\t"\
         "packuswb                         %%mm4, %%mm3      \n\t"\
         MOVNTQ(%%mm3, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "add                                 $8, %3         \n\t"\
+        "cmp                                 %2, %3         \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
         "movq                             %%mm3, %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
         :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
-        : "%"REG_a, "%"REG_d, "%"REG_S\
+           "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
+        : "%"REG_d, "%"REG_S\
     );
 
 static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrSrc,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
                                     int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
-        YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-        YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
-        YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+        YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
-#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
+#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
     __asm__ volatile(\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
         "pxor                             %%mm4, %%mm4      \n\t"\
         "pxor                             %%mm5, %%mm5      \n\t"\
         "pxor                             %%mm6, %%mm6      \n\t"\
@@ -102,10 +101,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         ".p2align                             4             \n\t"\
         "1:                                                 \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm0      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
         "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
-        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
+        "movq                (%%"REG_S", %3, 2), %%mm1      \n\t" /* srcData */\
         "movq                             %%mm0, %%mm3      \n\t"\
         "punpcklwd                        %%mm1, %%mm0      \n\t"\
         "punpckhwd                        %%mm1, %%mm3      \n\t"\
@@ -114,7 +113,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "pmaddwd                          %%mm1, %%mm3      \n\t"\
         "paddd                            %%mm0, %%mm4      \n\t"\
         "paddd                            %%mm3, %%mm5      \n\t"\
-        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
+        "movq               8(%%"REG_S", %3, 2), %%mm3      \n\t" /* srcData */\
         "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
         "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
         "test                         %%"REG_S", %%"REG_S"  \n\t"\
@@ -139,8 +138,8 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
         MOVNTQ(%%mm4, (%1, %%REGa))\
-        "add                                 $8, %%"REG_a"  \n\t"\
-        "cmp                                 %2, %%"REG_a"  \n\t"\
+        "add                                 $8, %3         \n\t"\
+        "cmp                                 %2, %3         \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "pxor                             %%mm4, %%mm4      \n\t"\
         "pxor                             %%mm5, %%mm5      \n\t"\
@@ -149,26 +148,27 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
         :: "r" (&c->redDither),\
-        "r" (dest), "g" ((x86_reg)width)\
+        "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
 static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
-        YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-        YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
-        YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
+        YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
 #define YSCALEYUV2YV121 \
@@ -185,12 +185,13 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     "jnc                   1b             \n\t"
 
 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
-                                    const int16_t *chrSrc, const int16_t *alpSrc,
+                                    const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                    const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     long p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -225,12 +226,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
     "jnc                   1b             \n\t"
 
 static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
-                                       const int16_t *chrSrc, const int16_t *alpSrc,
+                                       const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                       const int16_t *alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     long p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW };
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -260,7 +262,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         "2:                                             \n\t"\
         "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
         "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"     \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
         "add                         $16, %%"REG_d"     \n\t"\
         "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
         "pmulhw                    %%mm0, %%mm2         \n\t"\
@@ -296,7 +299,7 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 #define YSCALEYUV2PACKEDX_END                     \
         :: "r" (&c->redDither),                   \
             "m" (dummy), "m" (dummy), "m" (dummy),\
-            "r" (dest), "m" (dstW_reg)            \
+            "r" (dest), "m" (dstW_reg), "m"(uv_off) \
         : "%"REG_a, "%"REG_d, "%"REG_S            \
     );
 
@@ -315,7 +318,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         ".p2align                      4                \n\t"\
         "2:                                             \n\t"\
         "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"      \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
         "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
         "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
         "movq                      %%mm0, %%mm3         \n\t"\
@@ -326,7 +330,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
         "pmaddwd                   %%mm1, %%mm3         \n\t"\
         "paddd                     %%mm0, %%mm4         \n\t"\
         "paddd                     %%mm3, %%mm5         \n\t"\
-        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
+        "add                          %6, %%"REG_S"      \n\t" \
+        "movq     (%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
         "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
         "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
         "test                  %%"REG_S", %%"REG_S"     \n\t"\
@@ -461,12 +466,14 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
 static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                           const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                                          const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
                                           uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX_ACCURATE
@@ -492,12 +499,14 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
 
 static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX
@@ -547,12 +556,14 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                            const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           const int16_t *chrFilter, const int16_t **chrUSrc,
+                                           const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -569,12 +580,14 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
 
 static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                                        const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -620,12 +633,14 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                            const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrSrc,
+                                           const int16_t *chrFilter, const int16_t **chrUSrc,
+                                           const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
                                            uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -642,12 +657,14 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
 
 static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrSrc,
+                                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                                        const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
                                         uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -773,12 +790,14 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 
 static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                           const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrSrc,
+                                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                                          const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
                                           uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -788,19 +807,21 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
     WRITEBGR24(%%REGc, %5, %%REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest), "m" (dstW_reg)
+       "r" (dest), "m" (dstW_reg), "m"(uv_off)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
 
 static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrSrc,
+                                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                                       const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -810,7 +831,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     WRITEBGR24(%%REGc, %5, %%REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
-       "r" (dest),  "m" (dstW_reg)
+       "r" (dest),  "m" (dstW_reg), "m"(uv_off)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
@@ -832,15 +853,16 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     " jb          1b            \n\t"
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
-
 static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
                                             const int16_t **lumSrc, int lumFilterSize,
-                                            const int16_t *chrFilter, const int16_t **chrSrc,
+                                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                                            const int16_t **chrVSrc,
                                             int chrFilterSize, const int16_t **alpSrc,
                                             uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX_ACCURATE
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -854,12 +876,14 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
 
 static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
                                          const int16_t **lumSrc, int lumFilterSize,
-                                         const int16_t *chrFilter, const int16_t **chrSrc,
+                                         const int16_t *chrFilter, const int16_t **chrUSrc,
+                                         const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
                                          uint8_t *dest, long dstW, long dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_off << 1;
 
     YSCALEYUV2PACKEDX
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -871,14 +895,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
     YSCALEYUV2PACKEDX_END
 }
 
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
+#define REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -941,8 +967,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 
 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
+#define YSCALEYUV2RGB(index, c, uv_off) \
+    REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
     REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
     REAL_YSCALEYUV2RGB_COEFF(c)
 
@@ -950,23 +976,26 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
  * vertical bilinear scale YV12 to RGB
  */
 static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *uvbuf0,
-                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *buf1, const uint16_t *ubuf0,
+                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                       const uint16_t *vbuf1, const uint16_t *abuf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
         __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5)
+            YSCALEYUV2RGB(%%r8, %5, %8)
             YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "packuswb            %%mm7, %%mm1       \n\t"
             WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
                "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1)
+               "r" (abuf0), "r" (abuf1), "m"(uv_off)
             : "%r8"
         );
 #else
@@ -976,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            YSCALEYUV2RGB(%%REGBP, %5, %6)
             "push                   %0              \n\t"
             "push                   %1              \n\t"
             "mov          "U_TEMP"(%5), %0          \n\t"
@@ -990,8 +1019,8 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
 #endif
     } else {
@@ -999,50 +1028,56 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            YSCALEYUV2RGB(%%REGBP, %5, %6)
             "pcmpeqd %%mm7, %%mm7                   \n\t"
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *uvbuf0,
-                                       const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                       const uint16_t *buf1, const uint16_t *ubuf0,
+                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                       const uint16_t *vbuf1, const uint16_t *abuf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
 static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *uvbuf0,
-                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *buf1, const uint16_t *ubuf0,
+                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                        const uint16_t *vbuf1, const uint16_t *abuf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1053,23 +1088,26 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
         WRITERGB15(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
 static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *uvbuf0,
-                                        const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                        const uint16_t *buf1, const uint16_t *ubuf0,
+                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                        const uint16_t *vbuf1, const uint16_t *abuf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        YSCALEYUV2RGB(%%REGBP, %5, %6)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1080,12 +1118,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
         WRITERGB16(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
-#define REAL_YSCALEYUV2PACKED(index, c) \
+#define REAL_YSCALEYUV2PACKED(index, c, uv_off) \
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
     "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
     "psraw                $3, %%mm0                           \n\t"\
@@ -1097,8 +1135,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1121,34 +1161,39 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
     "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+#define YSCALEYUV2PACKED(index, c, uv_off)  REAL_YSCALEYUV2PACKED(index, c, uv_off)
 
 static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *buf1, const uint16_t *uvbuf0,
-                                         const uint16_t *uvbuf1, const uint16_t *abuf0,
+                                         const uint16_t *buf1, const uint16_t *ubuf0,
+                                         const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                         const uint16_t *vbuf1, const uint16_t *abuf0,
                                          const uint16_t *abuf1, uint8_t *dest,
                                          int dstW, int yalpha, int uvalpha, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov %4, %%"REG_b"                        \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5)
+        YSCALEYUV2PACKED(%%REGBP, %5, %6)
         WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-           "a" (&c->redDither)
+        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+           "a" (&c->redDither), "m"(uv_off)
     );
 }
 
-#define REAL_YSCALEYUV2RGB1(index, c) \
+#define REAL_YSCALEYUV2RGB1(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
     "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
     "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@@ -1190,17 +1235,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+#define YSCALEYUV2RGB1(index, c, uv_off)  REAL_YSCALEYUV2RGB1(index, c, uv_off)
 
 // do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
+#define REAL_YSCALEYUV2RGB1b(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@@ -1244,7 +1291,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+#define YSCALEYUV2RGB1b(index, c, uv_off)  REAL_YSCALEYUV2RGB1b(index, c, uv_off)
 
 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
     "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
@@ -1258,11 +1305,13 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
  * YV12 to RGB without scaling or interpolating
  */
 static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
                                        const uint16_t *abuf0, uint8_t *dest,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1271,26 +1320,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1(%%REGBP, %5, %6)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
+                YSCALEYUV2RGB1(%%REGBP, %5, %6)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         }
     } else {
@@ -1299,37 +1348,39 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
+                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                   "a" (&c->redDither)
+                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+                   "a" (&c->redDither), "m"(uv_off)
             );
         }
     }
 }
 
 static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
                                        const uint16_t *abuf0, uint8_t *dest,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1337,36 +1388,38 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                                         const uint16_t *abuf0, uint8_t *dest,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1374,7 +1427,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1385,15 +1438,15 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB15(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1404,18 +1457,20 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB15(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
 static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
                                         const uint16_t *abuf0, uint8_t *dest,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1423,7 +1478,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            YSCALEYUV2RGB1(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1434,15 +1489,15 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB16(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1453,18 +1508,20 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             WRITERGB16(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
 
-#define REAL_YSCALEYUV2PACKED1(index, c) \
+#define REAL_YSCALEYUV2PACKED1(index, c, uv_off) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "psraw                $7, %%mm3     \n\t" \
     "psraw                $7, %%mm4     \n\t" \
     "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@@ -1472,16 +1529,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t" \
 
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+#define YSCALEYUV2PACKED1(index, c, uv_off)  REAL_YSCALEYUV2PACKED1(index, c, uv_off)
 
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
+#define REAL_YSCALEYUV2PACKED1b(index, c, uv_off) \
     "xor "#index", "#index"             \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "add           "#uv_off", "#index"  \n\t" \
+    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "sub           "#uv_off", "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $8, %%mm3     \n\t" \
@@ -1490,14 +1549,16 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+#define YSCALEYUV2PACKED1b(index, c, uv_off)  REAL_YSCALEYUV2PACKED1b(index, c, uv_off)
 
 static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+                                         const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                         const uint16_t *vbuf0, const uint16_t *vbuf1,
                                          const uint16_t *abuf0, uint8_t *dest,
                                          int dstW, int uvalpha, enum PixelFormat dstFormat,
                                          int flags, int y)
 {
+    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1505,24 +1566,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5)
+            YSCALEYUV2PACKED1(%%REGBP, %5, %6)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5)
+            YSCALEYUV2PACKED1b(%%REGBP, %5, %6)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-               "a" (&c->redDither)
+            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
+               "a" (&c->redDither), "m"(uv_off)
         );
     }
 }
@@ -2229,7 +2290,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
         dst[i] = src[srcW-1]*128;
 }
 
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
+static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                         long dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
@@ -2244,7 +2305,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
 
     __asm__ volatile(
 #if defined(PIC)
-        "mov          %%"REG_b", %6         \n\t"
+        "mov          %%"REG_b", %7         \n\t"
 #endif
         "pxor             %%mm7, %%mm7      \n\t"
         "mov                 %0, %%"REG_c"  \n\t"
@@ -2262,8 +2323,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
         CALL_MMX2_FILTER_CODE
         "xor          %%"REG_a", %%"REG_a"  \n\t" // i
         "mov                 %5, %%"REG_c"  \n\t" // src
-        "mov                 %1, %%"REG_D"  \n\t" // buf1
-        "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+        "mov                 %6, %%"REG_D"  \n\t" // buf2
         PREFETCH"   (%%"REG_c")             \n\t"
         PREFETCH" 32(%%"REG_c")             \n\t"
         PREFETCH" 64(%%"REG_c")             \n\t"
@@ -2274,10 +2334,10 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
         CALL_MMX2_FILTER_CODE
 
 #if defined(PIC)
-        "mov %6, %%"REG_b"    \n\t"
+        "mov %7, %%"REG_b"    \n\t"
 #endif
-        :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
-           "m" (mmx2FilterCode), "m" (src2)
+        :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
+           "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
 #if defined(PIC)
           ,"m" (ebxsave)
 #endif
@@ -2288,8 +2348,8 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
     );
 
     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
-        dst[i] = src1[srcW-1]*128;
-        dst[i+VOFW] = src2[srcW-1]*128;
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
     }
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
@@ -2301,7 +2361,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int dstH= c->dstH;
     const int flags= c->flags;
     int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -2326,7 +2387,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     c->redDither= ff_dither8[(dstY+1)&1];
     if (dstY < dstH - 2) {
         const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-        const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
         int i;
         if (flags & SWS_ACCURATE_RND) {
@@ -2345,29 +2407,26 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
                 }
             }
             for (i=0; i<vChrFilterSize; i+=2) {
-                *(const void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
                 chrMmxFilter[s*i+APCK_COEF/4  ]=
                 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
                            + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
             }
         } else {
             for (i=0; i<vLumFilterSize; i++) {
-                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
                 lumMmxFilter[4*i+2]=
                 lumMmxFilter[4*i+3]=
                     ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
-                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
+                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
                     alpMmxFilter[4*i+2]=
                     alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
                 }
             }
             for (i=0; i<vChrFilterSize; i++) {
-                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
                 chrMmxFilter[4*i+2]=
                 chrMmxFilter[4*i+3]=
                     ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
@@ -2492,7 +2551,6 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         }
     }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
-
     if(isAnyRGB(c->srcFormat))
         c->hScale16= RENAME(hScale16);
 }

From 39d607e5bbc25ad9629683702b510e865434ef21 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 27 May 2011 23:50:06 +0200
Subject: [PATCH 438/830] swscale: Commits that could not be pulled earlier due
 to bugs #2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5a5a0f161359ca6c3fd03eac88a57bd026b8bc1d
Author: Diego Biurrun <diego@biurrun.de>
Date:   Fri May 27 19:46:39 2011 +0200

    swscale: Remove unused variables in x86 code.

    libswscale/x86/swscale_template.c:2072: warning: unused variable ‘canMMX2BeUsed’
    libswscale/x86/swscale_template.c:2145: warning: unused variable ‘canMMX2BeUsed’
    libswscale/x86/swscale_template.c:2209: warning: unused variable ‘chrVPixBuf’
    libswscale/x86/swscale_template.c:2237: warning: unused variable ‘chrVSrcPtr’

commit 389e2000ebc299b9da24f7e5faf9a68a88f9ee7c
Author: Ronald S. Bultje <rsbultje@gmail.com>
Date:   Fri May 27 12:23:32 2011 -0400

    swscale: delay allocation of formatConvBuffer().

    That means it won't be allocated when not needed. Alongside
    this, it fixes valgrind/fate-detected memory leaks.

commit f327bfa6dcdbce4593213c30a328d8aaf7a4b86b
Author: Ronald S. Bultje <rsbultje@gmail.com>
Date:   Fri May 27 11:36:43 2011 -0400

    swscale: fix build with --disable-swscale-alpha.

commit 9f5d45025e8df9d5f39832caad16b94cb6ac11c5
Author: Ronald S. Bultje <rsbultje@gmail.com>
Date:   Fri May 27 09:28:38 2011 -0400

    swscale: fix non-bitexact yuv2yuv[X2]() MMX/MMX2 functions.
---
 libswscale/utils.c                |  2 +-
 libswscale/x86/swscale_template.c | 14 ++++++--------
 libswscale/x86/yuv2rgb_mmx.c      |  8 ++++----
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 20cc3f187a..96d6efbb8c 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -795,7 +795,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
 
     if (!dstFilter) dstFilter= &dummyFilter;
     if (!srcFilter) srcFilter= &dummyFilter;
@@ -851,6 +850,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
     if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 0e8c381c04..bb35693cf9 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -59,7 +59,7 @@
         "psraw                               $3, %%mm3      \n\t"\
         "psraw                               $3, %%mm4      \n\t"\
         "packuswb                         %%mm4, %%mm3      \n\t"\
-        MOVNTQ(%%mm3, (%1, %%REGa))\
+        MOVNTQ(%%mm3, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
         "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
@@ -81,8 +81,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
+        x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
-        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
+        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
@@ -137,7 +138,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "psraw                               $3, %%mm4      \n\t"\
         "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
-        MOVNTQ(%%mm4, (%1, %%REGa))\
+        MOVNTQ(%%mm4, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
@@ -161,8 +162,9 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        uint8_t *aDest, long dstW, long chrDstW)
 {
     if (uDest) {
+        x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
-        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
+        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
@@ -2223,7 +2225,6 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 {
     int32_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
     void    *mmx2FilterCode= c->lumMmx2FilterCode;
     int i;
 #if defined(PIC)
@@ -2296,7 +2297,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 {
     int32_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
-    int     canMMX2BeUsed  = c->canMMX2BeUsed;
     void    *mmx2FilterCode= c->chrMmx2FilterCode;
     int i;
 #if defined(PIC)
@@ -2362,7 +2362,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     const int flags= c->flags;
     int16_t **lumPixBuf= c->lumPixBuf;
     int16_t **chrUPixBuf= c->chrUPixBuf;
-    int16_t **chrVPixBuf= c->chrVPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
@@ -2388,7 +2387,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
     if (dstY < dstH - 2) {
         const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
         const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-        const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
         int i;
         if (flags & SWS_ACCURATE_RND) {
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 855a82506e..d46d5126da 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -82,15 +82,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
     if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
             case PIX_FMT_RGB32:
-                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-#if HAVE_7REGS
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
                     return yuva420_rgb32_MMX;
 #endif
                     break;
                 } else return yuv420_rgb32_MMX;
             case PIX_FMT_BGR32:
-                if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
-#if HAVE_7REGS
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
                     return yuva420_bgr32_MMX;
 #endif
                     break;

From 42761122779dea2629cfc54a8bbe5d327e729c6e Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 28 May 2011 17:20:49 +0100
Subject: [PATCH 439/830] vp8: use av_clip_uintp2() where possible

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/vp8.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 38f38b7cb3..5500706494 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -166,12 +166,12 @@ static void get_quants(VP8Context *s)
         } else
             base_qi = yac_qi;
 
-        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
-        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
-        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
-        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
-        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
-        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
+        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
+        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
+        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
+        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
+        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
 
         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);

From 798b26467877007418d21722d59c769d17ed7233 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 28 May 2011 13:10:01 +0100
Subject: [PATCH 440/830] build: remove BUILD_ROOT variable

This variable is unnecessary as absolute paths are not required.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile   |  6 +++---
 common.mak | 12 +++---------
 configure  |  3 +--
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile
index 73ada9a59d..d18a2f97cf 100644
--- a/Makefile
+++ b/Makefile
@@ -202,15 +202,15 @@ ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
 
 tests/vsynth1/00.pgm: tests/videogen$(HOSTEXESUF)
 	@mkdir -p tests/vsynth1
-	$(M)$(BUILD_ROOT)/$< 'tests/vsynth1/'
+	$(M)./$< 'tests/vsynth1/'
 
 tests/vsynth2/00.pgm: tests/rotozoom$(HOSTEXESUF)
 	@mkdir -p tests/vsynth2
-	$(M)$(BUILD_ROOT)/$< 'tests/vsynth2/' $(SRC_PATH)/tests/lena.pnm
+	$(M)./$< 'tests/vsynth2/' $(SRC_PATH)/tests/lena.pnm
 
 tests/data/asynth1.sw: tests/audiogen$(HOSTEXESUF)
 	@mkdir -p tests/data
-	$(M)$(BUILD_ROOT)/$< $@
+	$(M)./$< $@
 
 tests/data/asynth1.sw tests/vsynth%/00.pgm: TAG = GEN
 
diff --git a/common.mak b/common.mak
index 03fae6f805..20876c0951 100644
--- a/common.mak
+++ b/common.mak
@@ -12,12 +12,6 @@ vpath %.S   $(SRC_DIR)
 vpath %.asm $(SRC_DIR)
 vpath %.v   $(SRC_DIR)
 
-ifeq ($(SRC_DIR),$(SRC_PATH_BARE))
-BUILD_ROOT_REL = .
-else
-BUILD_ROOT_REL = ..
-endif
-
 ifndef V
 Q      = @
 ECHO   = printf "$(1)\t%s\n" $(2)
@@ -33,7 +27,7 @@ endif
 
 ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale
 
-IFLAGS   := -I$(BUILD_ROOT_REL) -I$(SRC_PATH)
+IFLAGS   := -I. -I$(SRC_PATH)
 CPPFLAGS := $(IFLAGS) $(CPPFLAGS)
 CFLAGS   += $(ECFLAGS)
 YASMFLAGS += $(IFLAGS) -Pconfig.asm
@@ -85,7 +79,7 @@ FFLIBS    := $(FFLIBS-yes) $(FFLIBS)
 TESTPROGS += $(TESTPROGS-yes)
 
 FFEXTRALIBS := $(addprefix -l,$(addsuffix $(BUILDSUF),$(FFLIBS))) $(EXTRALIBS)
-FFLDFLAGS   := $(addprefix -L$(BUILD_ROOT)/lib,$(ALLFFLIBS)) $(LDFLAGS)
+FFLDFLAGS   := $(addprefix -Llib,$(ALLFFLIBS)) $(LDFLAGS)
 
 EXAMPLES  := $(addprefix $(SUBDIR),$(addsuffix -example$(EXESUF),$(EXAMPLES)))
 OBJS      := $(addprefix $(SUBDIR),$(sort $(OBJS)))
@@ -94,7 +88,7 @@ TESTPROGS := $(addprefix $(SUBDIR),$(addsuffix -test$(EXESUF),$(TESTPROGS)))
 HOSTOBJS  := $(addprefix $(SUBDIR),$(addsuffix .o,$(HOSTPROGS)))
 HOSTPROGS := $(addprefix $(SUBDIR),$(addsuffix $(HOSTEXESUF),$(HOSTPROGS)))
 
-DEP_LIBS := $(foreach NAME,$(FFLIBS),$(BUILD_ROOT_REL)/lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME))
+DEP_LIBS := $(foreach NAME,$(FFLIBS),lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME))
 
 ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(ARCH)/*.h))
 SKIPHEADERS += $(addprefix $(ARCH)/,$(ARCH_HEADERS))
diff --git a/configure b/configure
index b535920cb2..805e4ba50a 100755
--- a/configure
+++ b/configure
@@ -2989,7 +2989,7 @@ enabled extra_warnings && check_cflags -Winline
 
 # add some linker flags
 check_ldflags -Wl,--warn-common
-check_ldflags '-Wl,-rpath-link,\$(BUILD_ROOT)/libpostproc -Wl,-rpath-link,\$(BUILD_ROOT)/libswscale -Wl,-rpath-link,\$(BUILD_ROOT)/libavfilter -Wl,-rpath-link,\$(BUILD_ROOT)/libavdevice -Wl,-rpath-link,\$(BUILD_ROOT)/libavformat -Wl,-rpath-link,\$(BUILD_ROOT)/libavcodec -Wl,-rpath-link,\$(BUILD_ROOT)/libavutil'
+check_ldflags -Wl,-rpath-link,libpostproc -Wl,-rpath-link,libswscale -Wl,-rpath-link,libavfilter -Wl,-rpath-link,libavdevice -Wl,-rpath-link,libavformat -Wl,-rpath-link,libavcodec -Wl,-rpath-link,libavutil
 test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
 
 echo "X{};" > $TMPV
@@ -3253,7 +3253,6 @@ DATADIR=\$(DESTDIR)$datadir
 MANDIR=\$(DESTDIR)$mandir
 SRC_PATH="$source_path"
 SRC_PATH_BARE=$source_path
-BUILD_ROOT="$PWD"
 CC_IDENT=$cc_ident
 ARCH=$arch
 CC=$cc

From 2146f4928a45b37eba781a74aa8cc9ae50e89d52 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 27 May 2011 20:22:55 +0200
Subject: [PATCH 441/830] vf_crop: Replace #ifdef DEBUG + av_log() by
 av_dlog().

---
 libavfilter/vf_crop.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index 9f71b9e7dc..69e5a520c8 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c
@@ -264,11 +264,9 @@ static void start_frame(AVFilterLink *link, AVFilterBufferRef *picref)
     crop->x &= ~((1 << crop->hsub) - 1);
     crop->y &= ~((1 << crop->vsub) - 1);
 
-#ifdef DEBUG
-    av_log(ctx, AV_LOG_DEBUG,
-           "n:%d t:%f x:%d y:%d x+w:%d y+h:%d\n",
-           (int)crop->var_values[VAR_N], crop->var_values[VAR_T], crop->x, crop->y, crop->x+crop->w, crop->y+crop->h);
-#endif
+    av_dlog(ctx, "n:%d t:%f x:%d y:%d x+w:%d y+h:%d\n",
+            (int)crop->var_values[VAR_N], crop->var_values[VAR_T], crop->x,
+            crop->y, crop->x+crop->w, crop->y+crop->h);
 
     ref2->data[0] += crop->y * ref2->linesize[0];
     ref2->data[0] += crop->x * crop->max_step[0];

From d6e0729b24de566a80554abbdf0aca1ed24de14b Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 27 May 2011 20:24:44 +0200
Subject: [PATCH 442/830] avfilter: Surround function only used in debug mode
 by appropriate #ifdef.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the warning:
libavfilter/avfilter.c:219: warning: ‘ff_get_ref_perms_string’ defined but not used
---
 libavfilter/avfilter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 02915036ab..abeae14f79 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -215,6 +215,7 @@ int avfilter_config_links(AVFilterContext *filter)
     return 0;
 }
 
+#ifdef DEBUG
 static char *ff_get_ref_perms_string(char *buf, size_t buf_size, int perms)
 {
     snprintf(buf, buf_size, "%s%s%s%s%s%s",
@@ -226,6 +227,7 @@ static char *ff_get_ref_perms_string(char *buf, size_t buf_size, int perms)
              perms & AV_PERM_NEG_LINESIZES ? "n" : "");
     return buf;
 }
+#endif
 
 static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end)
 {

From 06b5facd4b8cb19da352cec71b2edf2f0343ee35 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 29 Apr 2011 19:35:08 +0200
Subject: [PATCH 443/830] swscale: Remove disabled code.

---
 libswscale/ppc/yuv2rgb_altivec.c | 152 -------------------------------
 libswscale/rgb2rgb_template.c    |   7 --
 libswscale/yuv2rgb.c             |  74 ---------------
 3 files changed, 233 deletions(-)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 0cc0d3084d..54ebee1edf 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -447,159 +447,7 @@ static int altivec_##name (SwsContext *c,                               \
 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
 
 DEFCSP420_CVT (yuv2_abgr, out_abgr)
-#if 1
 DEFCSP420_CVT (yuv2_bgra, out_bgra)
-#else
-static int altivec_yuv2_bgra32 (SwsContext *c,
-                                unsigned char **in, int *instrides,
-                                int srcSliceY,        int srcSliceH,
-                                unsigned char **oplanes, int *outstrides)
-{
-    int w = c->srcW;
-    int h = srcSliceH;
-    int i,j;
-    int instrides_scl[3];
-    vector unsigned char y0,y1;
-
-    vector signed char  u,v;
-
-    vector signed short Y0,Y1,Y2,Y3;
-    vector signed short U,V;
-    vector signed short vx,ux,uvx;
-    vector signed short vx0,ux0,uvx0;
-    vector signed short vx1,ux1,uvx1;
-    vector signed short R0,G0,B0;
-    vector signed short R1,G1,B1;
-    vector unsigned char R,G,B;
-
-    vector unsigned char *uivP, *vivP;
-    vector unsigned char align_perm;
-
-    vector signed short
-        lCY  = c->CY,
-        lOY  = c->OY,
-        lCRV = c->CRV,
-        lCBU = c->CBU,
-        lCGU = c->CGU,
-        lCGV = c->CGV;
-
-    vector unsigned short lCSHIFT = c->CSHIFT;
-
-    ubyte *y1i   = in[0];
-    ubyte *y2i   = in[0]+w;
-    ubyte *ui    = in[1];
-    ubyte *vi    = in[2];
-
-    vector unsigned char *oute
-        = (vector unsigned char *)
-          (oplanes[0]+srcSliceY*outstrides[0]);
-    vector unsigned char *outo
-        = (vector unsigned char *)
-          (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
-
-
-    instrides_scl[0] = instrides[0];
-    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */
-    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */
-
-
-    for (i=0;i<h/2;i++) {
-        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
-        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
-
-        for (j=0;j<w/16;j++) {
-
-            y0 = vec_ldl (0,y1i);
-            y1 = vec_ldl (0,y2i);
-            uivP = (vector unsigned char *)ui;
-            vivP = (vector unsigned char *)vi;
-
-            align_perm = vec_lvsl (0, ui);
-            u  = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
-
-            align_perm = vec_lvsl (0, vi);
-            v  = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
-            u  = (vector signed char)
-                 vec_sub (u,(vector signed char)
-                          vec_splat((vector signed char){128},0));
-
-            v  = (vector signed char)
-                 vec_sub (v, (vector signed char)
-                          vec_splat((vector signed char){128},0));
-
-            U  = vec_unpackh (u);
-            V  = vec_unpackh (v);
-
-
-            Y0 = vec_unh (y0);
-            Y1 = vec_unl (y0);
-            Y2 = vec_unh (y1);
-            Y3 = vec_unl (y1);
-
-            Y0 = vec_mradds (Y0, lCY, lOY);
-            Y1 = vec_mradds (Y1, lCY, lOY);
-            Y2 = vec_mradds (Y2, lCY, lOY);
-            Y3 = vec_mradds (Y3, lCY, lOY);
-
-            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */
-            ux = vec_sl (U, lCSHIFT);
-            ux = vec_mradds (ux, lCBU, (vector signed short){0});
-            ux0  = vec_mergeh (ux,ux);
-            ux1  = vec_mergel (ux,ux);
-
-            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */
-            vx = vec_sl (V, lCSHIFT);
-            vx = vec_mradds (vx, lCRV, (vector signed short){0});
-            vx0  = vec_mergeh (vx,vx);
-            vx1  = vec_mergel (vx,vx);
-            /* uvx = ((CGU*u) + (CGV*v))>>15 */
-            uvx = vec_mradds (U, lCGU, (vector signed short){0});
-            uvx = vec_mradds (V, lCGV, uvx);
-            uvx0 = vec_mergeh (uvx,uvx);
-            uvx1 = vec_mergel (uvx,uvx);
-            R0 = vec_add (Y0,vx0);
-            G0 = vec_add (Y0,uvx0);
-            B0 = vec_add (Y0,ux0);
-            R1 = vec_add (Y1,vx1);
-            G1 = vec_add (Y1,uvx1);
-            B1 = vec_add (Y1,ux1);
-            R  = vec_packclp (R0,R1);
-            G  = vec_packclp (G0,G1);
-            B  = vec_packclp (B0,B1);
-
-            out_argb(R,G,B,oute);
-            R0 = vec_add (Y2,vx0);
-            G0 = vec_add (Y2,uvx0);
-            B0 = vec_add (Y2,ux0);
-            R1 = vec_add (Y3,vx1);
-            G1 = vec_add (Y3,uvx1);
-            B1 = vec_add (Y3,ux1);
-            R  = vec_packclp (R0,R1);
-            G  = vec_packclp (G0,G1);
-            B  = vec_packclp (B0,B1);
-
-            out_argb(R,G,B,outo);
-            y1i  += 16;
-            y2i  += 16;
-            ui   += 8;
-            vi   += 8;
-
-        }
-
-        outo  += (outstrides[0])>>4;
-        oute  += (outstrides[0])>>4;
-
-        ui    += instrides_scl[1];
-        vi    += instrides_scl[2];
-        y1i   += instrides_scl[0];
-        y2i   += instrides_scl[0];
-    }
-    return srcSliceH;
-}
-
-#endif
-
-
 DEFCSP420_CVT (yuv2_rgba, out_rgba)
 DEFCSP420_CVT (yuv2_argb, out_argb)
 DEFCSP420_CVT (yuv2_rgb24,  out_rgb24)
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 0a226a2960..c02015e5b3 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -574,7 +574,6 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
     }
 
     // last line
-#if 1
     dst[0]= src[0];
 
     for (x=0; x<srcWidth-1; x++) {
@@ -582,12 +581,6 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
     }
     dst[2*srcWidth-1]= src[srcWidth-1];
-#else
-    for (x=0; x<srcWidth; x++) {
-        dst[2*x+0]=
-        dst[2*x+1]= src[x];
-    }
-#endif
 }
 
 /**
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index a502f654ed..cad09338d3 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -366,28 +366,6 @@ YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
     PUTRGB(dst_1,py_1,3);
 CLOSEYUV2RGBFUNC(8)
 
-#if 0 // Currently unused
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0);
-    PUTRGB(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1);
-    PUTRGB(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2);
-    PUTRGB(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3);
-    PUTRGB(dst_1,py_1,3);
-CLOSEYUV2RGBFUNC(8)
-#endif
-
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
     const uint8_t *d16 = dither_4x4_16[y&3];
@@ -441,36 +419,6 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
     PUTRGB8(dst_1,py_1,3,6);
 CLOSEYUV2RGBFUNC(8)
 
-#if 0 // Currently unused
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0)
-    int acc;
-#define PUTRGB4(dst,src,i)          \
-    Y = src[2*i];                   \
-    acc = r[Y] + g[Y] + b[Y];       \
-    Y = src[2*i+1];                 \
-    acc |= (r[Y] + g[Y] + b[Y])<<4; \
-    dst[i] = acc;
-
-    LOADCHROMA(0);
-    PUTRGB4(dst_1,py_1,0);
-    PUTRGB4(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB4(dst_2,py_2,1);
-    PUTRGB4(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTRGB4(dst_1,py_1,2);
-    PUTRGB4(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTRGB4(dst_2,py_2,3);
-    PUTRGB4(dst_1,py_1,3);
-CLOSEYUV2RGBFUNC(4)
-#endif
-
 YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
     const uint8_t *d64 =  dither_8x8_73[y&7];
     const uint8_t *d128 = dither_8x8_220[y&7];
@@ -500,28 +448,6 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
     PUTRGB4D(dst_1,py_1,3,6);
 CLOSEYUV2RGBFUNC(4)
 
-#if 0 // Currently unused
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0);
-    PUTRGB(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1);
-    PUTRGB(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2);
-    PUTRGB(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3);
-    PUTRGB(dst_1,py_1,3);
-CLOSEYUV2RGBFUNC(8)
-#endif
-
 YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
     const uint8_t *d64 =  dither_8x8_73[y&7];
     const uint8_t *d128 = dither_8x8_220[y&7];

From 7a70e01b267b499d92fda68724d311c94cd76b26 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 27 May 2011 09:32:17 +0200
Subject: [PATCH 444/830] v4l2: don't leak video standard string on error.

---
 libavdevice/v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index e0186f8031..80b640078d 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -511,7 +511,6 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
             return AVERROR(EIO);
         }
     }
-    av_freep(&s->standard);
 
     if (ap->time_base.num && ap->time_base.den) {
         av_log(s1, AV_LOG_DEBUG, "Setting time per frame to %d/%d\n",
@@ -681,6 +680,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 out:
     av_freep(&s->video_size);
     av_freep(&s->pixel_format);
+    av_freep(&s->standard);
     return res;
 }
 

From 54dc95634d1d15d5d6e3c7c361610c8eec97e75e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 29 May 2011 15:22:15 +0200
Subject: [PATCH 445/830] Cast pointers to uintptr_t rather than unsigned int.

Avoids potential warnings on PPC64 systems.
---
 libswscale/ppc/swscale_altivec_template.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index 9a93252502..5d4eac5bde 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -29,13 +29,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
     register int i;
     vector unsigned int altivec_vectorShiftInt19 =
         vec_add(vec_splat_u32(10), vec_splat_u32(9));
-    if ((unsigned int)dest % 16) {
+    if ((uintptr_t)dest % 16) {
         /* badly aligned store, we force store alignment */
         /* and will handle load misalignment on val w/ vec_perm */
         vector unsigned char perm1;
         vector signed int v1;
         for (i = 0 ; (i < dstW) &&
-            (((unsigned int)dest + i) % 16) ; i++) {
+            (((uintptr_t)dest + i) % 16) ; i++) {
                 int t = val[i] >> 19;
                 dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
         }

From 4596d0f4c448e7cec37818cd3ee3583346083f28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 29 May 2011 15:23:28 +0200
Subject: [PATCH 446/830] Remove unused variable, avoiding compiler warning.

---
 libswscale/ppc/swscale_template.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 20a81e5662..27351adca1 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -66,8 +66,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
-
     c->yuv2yuvX     = RENAME(yuv2yuvX    );
     c->yuv2packedX  = RENAME(yuv2packedX );
 }

From 89c687e97e6544414e11808d99d7a56729b79ef8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 29 May 2011 15:25:17 +0200
Subject: [PATCH 447/830] Add const to vector types for input in altivec code.

Avoids a large amount of warnings about cast discarding qualifiers.
---
 libswscale/ppc/yuv2rgb_altivec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 06f8df8239..c1115e6abb 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -297,7 +297,7 @@ static int altivec_##name (SwsContext *c,                               \
     vector signed short R1,G1,B1;                                       \
     vector unsigned char R,G,B;                                         \
                                                                         \
-    vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
+    vector const unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            \
     vector unsigned char align_perm;                                    \
                                                                         \
     vector signed short                                                 \
@@ -334,10 +334,10 @@ static int altivec_##name (SwsContext *c,                               \
                                                                         \
         for (j=0;j<w/16;j++) {                                          \
                                                                         \
-            y1ivP = (vector unsigned char *)y1i;                        \
-            y2ivP = (vector unsigned char *)y2i;                        \
-            uivP  = (vector unsigned char *)ui;                         \
-            vivP  = (vector unsigned char *)vi;                         \
+            y1ivP = (vector const unsigned char *)y1i;                  \
+            y2ivP = (vector const unsigned char *)y2i;                  \
+            uivP  = (vector const unsigned char *)ui;                   \
+            vivP  = (vector const unsigned char *)vi;                   \
                                                                         \
             align_perm = vec_lvsl (0, y1i);                             \
             y0 = (vector unsigned char)                                 \

From bb9b7bc62eb645d127cbf283b8f86fd4b6b3c916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 29 May 2011 15:31:20 +0200
Subject: [PATCH 448/830] Remove now unused nb_istreams variable.

---
 ffmpeg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index dc435aba2a..3f4cc2fd04 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1978,7 +1978,7 @@ static int transcode(AVFormatContext **output_files,
                      int nb_input_files,
                      AVStreamMap *stream_maps, int nb_stream_maps)
 {
-    int ret = 0, i, j, k, n, nb_istreams = 0, nb_ostreams = 0,step;
+    int ret = 0, i, j, k, n, nb_ostreams = 0, step;
 
     AVFormatContext *is, *os;
     AVCodecContext *codec, *icodec;

From 371266daa3df35c424203fff0ce2e6de0e33a29d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 29 May 2011 14:22:52 +0100
Subject: [PATCH 449/830] ARM: enable UAL syntax in asm.S

This enables UAL syntax for all asm files instead of only those
which happen to be incompatible with the old, deprecated syntax.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/asm.S                 | 2 ++
 libavcodec/arm/dsputil_vfp.S         | 1 -
 libavcodec/arm/fmtconvert_vfp.S      | 2 --
 libavcodec/arm/mpegvideo_armv5te_s.S | 2 +-
 libavcodec/arm/vp8_armv6.S           | 2 --
 5 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
index 6e3d7881dd..8d7fe98614 100644
--- a/libavcodec/arm/asm.S
+++ b/libavcodec/arm/asm.S
@@ -26,6 +26,8 @@
 #   define ELF @
 #endif
 
+        .syntax unified
+
 .macro  require8 val=1
 ELF     .eabi_attribute 24, \val
 .endm
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S
index b41db03b63..16ea25afe6 100644
--- a/libavcodec/arm/dsputil_vfp.S
+++ b/libavcodec/arm/dsputil_vfp.S
@@ -21,7 +21,6 @@
 #include "config.h"
 #include "asm.S"
 
-        .syntax unified
 /*
  * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
  * throughput for almost all the instructions (except for double precision
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index e01627b669..1bb7f49801 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -21,8 +21,6 @@
 #include "config.h"
 #include "asm.S"
 
-        .syntax unified
-
 /**
  * ARM VFP optimized float to int16 conversion.
  * Assume that len is a positive number and is multiple of 8, destination
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index 6473926c0e..e83fe991e6 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -95,7 +95,7 @@ function ff_dct_unquantize_h263_armv5te, export=1
         strh            lr, [r0], #2
 
         subs            r3, r3, #8
-        ldrgtd          r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
+        ldrdgt          r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
         bgt             1b
 
         adds            r3, r3, #2
diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S
index 11d3521f8d..602c8a58be 100644
--- a/libavcodec/arm/vp8_armv6.S
+++ b/libavcodec/arm/vp8_armv6.S
@@ -20,8 +20,6 @@
 
 #include "asm.S"
 
-        .syntax         unified
-
 .macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
         adds            \bs, \bs, \t0
         lsl             \cw, \cw, \t0

From fd38a15adf7f4e20f25d89f162e4a8fbbd8ec92e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux@gmail.com>
Date: Sun, 29 May 2011 21:07:35 +0200
Subject: [PATCH 450/830] Fix various bad printf format warnings

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/mxfdec.c |  8 ++++----
 libavformat/r3d.c    | 12 ++++++------
 libavformat/utils.c  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 6b44b8f3e3..cd78a2aa59 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -309,7 +309,7 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (klv_read_packet(&klv, s->pb) < 0)
             return -1;
         PRINT_KEY(s, "read packet", klv.key);
-        av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset);
+        av_dlog(s, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
         if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) {
             int res = mxf_decrypt_triplet(s, pkt, &klv);
             if (res < 0) {
@@ -522,8 +522,8 @@ static int mxf_read_index_table_segment(void *arg, AVIOContext *pb, int tag, int
     case 0x3F06: av_dlog(NULL, "IndexSID %d\n", avio_rb32(pb)); break;
     case 0x3F07: av_dlog(NULL, "BodySID %d\n", avio_rb32(pb)); break;
     case 0x3F0B: av_dlog(NULL, "IndexEditRate %d/%d\n", avio_rb32(pb), avio_rb32(pb)); break;
-    case 0x3F0C: av_dlog(NULL, "IndexStartPosition %lld\n", avio_rb64(pb)); break;
-    case 0x3F0D: av_dlog(NULL, "IndexDuration %lld\n", avio_rb64(pb)); break;
+    case 0x3F0C: av_dlog(NULL, "IndexStartPosition %"PRId64"\n", avio_rb64(pb)); break;
+    case 0x3F0D: av_dlog(NULL, "IndexDuration %"PRId64"\n", avio_rb64(pb)); break;
     }
     return 0;
 }
@@ -920,7 +920,7 @@ static int mxf_read_header(AVFormatContext *s, AVFormatParameters *ap)
         if (klv_read_packet(&klv, s->pb) < 0)
             return -1;
         PRINT_KEY(s, "read header", klv.key);
-        av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset);
+        av_dlog(s, "size %"PRId64" offset %#"PRIx64"\n", klv.length, klv.offset);
         if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key) ||
             IS_KLV_KEY(klv.key, mxf_essence_element_key)) {
             /* FIXME avoid seek */
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index 5e027d158b..543418b233 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -43,7 +43,7 @@ static int read_atom(AVFormatContext *s, Atom *atom)
     if (atom->size < 8)
         return -1;
     atom->tag = avio_rl32(s->pb);
-    av_dlog(s, "atom %d %.4s offset %#llx\n",
+    av_dlog(s, "atom %d %.4s offset %#"PRIx64"\n",
             atom->size, (char*)&atom->tag, atom->offset);
     return atom->size;
 }
@@ -131,7 +131,7 @@ static int r3d_read_rdvo(AVFormatContext *s, Atom *atom)
     if (st->codec->time_base.den)
         st->duration = (uint64_t)r3d->video_offsets_count*
             st->time_base.den*st->codec->time_base.num/st->codec->time_base.den;
-    av_dlog(s, "duration %lld\n", st->duration);
+    av_dlog(s, "duration %"PRId64"\n", st->duration);
 
     return 0;
 }
@@ -176,7 +176,7 @@ static int r3d_read_header(AVFormatContext *s, AVFormatParameters *ap)
     }
 
     s->data_offset = avio_tell(s->pb);
-    av_dlog(s, "data offset %#llx\n", s->data_offset);
+    av_dlog(s, "data offset %#"PRIx64"\n", s->data_offset);
     if (!s->pb->seekable)
         return 0;
     // find REOB/REOF/REOS to load index
@@ -255,7 +255,7 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     if (st->codec->time_base.den)
         pkt->duration = (uint64_t)st->time_base.den*
             st->codec->time_base.num/st->codec->time_base.den;
-    av_dlog(s, "pkt dts %lld duration %d\n", pkt->dts, pkt->duration);
+    av_dlog(s, "pkt dts %"PRId64" duration %d\n", pkt->dts, pkt->duration);
 
     return 0;
 }
@@ -299,7 +299,7 @@ static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     pkt->stream_index = 1;
     pkt->dts = dts;
     pkt->duration = av_rescale(samples, st->time_base.den, st->codec->sample_rate);
-    av_dlog(s, "pkt dts %lld duration %d samples %d sample rate %d\n",
+    av_dlog(s, "pkt dts %"PRId64" duration %d samples %d sample rate %d\n",
             pkt->dts, pkt->duration, samples, st->codec->sample_rate);
 
     return 0;
@@ -356,7 +356,7 @@ static int r3d_seek(AVFormatContext *s, int stream_index, int64_t sample_time, i
 
     frame_num = sample_time*st->codec->time_base.den/
         ((int64_t)st->codec->time_base.num*st->time_base.den);
-    av_dlog(s, "seek frame num %d timestamp %lld\n", frame_num, sample_time);
+    av_dlog(s, "seek frame num %d timestamp %"PRId64"\n", frame_num, sample_time);
 
     if (frame_num < r3d->video_offsets_count) {
         avio_seek(s->pb, r3d->video_offsets_count, SEEK_SET);
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3432b2269f..f0080bc433 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2334,7 +2334,7 @@ int av_find_stream_info(AVFormatContext *ic)
         if (st->codec_info_nb_frames>1) {
             int64_t t;
             if (st->time_base.den > 0 && (t=av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q)) >= ic->max_analyze_duration) {
-                av_log(ic, AV_LOG_WARNING, "max_analyze_duration %d reached at %Ld\n", ic->max_analyze_duration, t);
+                av_log(ic, AV_LOG_WARNING, "max_analyze_duration %d reached at %"PRId64"\n", ic->max_analyze_duration, t);
                 break;
             }
             st->info->codec_info_duration += pkt->duration;

From adba9c63525b8971fc6ccda47e643dca05c3ee9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux@gmail.com>
Date: Sun, 29 May 2011 21:07:34 +0200
Subject: [PATCH 451/830] Fix various unused variable warnings

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c                    |  2 +-
 ffplay.c                    |  4 ++--
 libavcodec/a64multienc.c    |  2 --
 libavcodec/celp_filters.c   |  3 +--
 libavcodec/dca.c            |  5 ++---
 libavcodec/dirac.c          |  2 +-
 libavcodec/rv10.c           |  2 +-
 libavcodec/s302m.c          |  2 +-
 libavcodec/shorten.c        |  2 +-
 libavcodec/sp5xdec.c        |  3 ---
 libavcodec/truemotion2.c    |  3 +--
 libavcodec/vaapi_mpeg2.c    |  2 +-
 libavcodec/vmdav.c          |  2 --
 libavcodec/zmbvenc.c        |  4 ++--
 libavfilter/vf_fieldorder.c |  3 +--
 libavformat/4xm.c           |  2 +-
 libavformat/apetag.c        |  2 +-
 libavformat/asfdec.c        | 10 +++++-----
 libavformat/mmf.c           |  4 +---
 libavformat/mov.c           |  6 +++---
 libavformat/oggdec.c        |  4 ++--
 libavformat/oggparsecelt.c  |  4 ++--
 libavformat/oggparseogm.c   |  2 +-
 libavformat/rl2.c           |  4 ++--
 libavformat/rmdec.c         |  2 +-
 libavformat/rpl.c           |  2 +-
 libavformat/rtpdec_latm.c   |  2 +-
 libavformat/sauce.c         |  2 +-
 libavformat/smacker.c       |  2 --
 libavformat/sol.c           |  2 +-
 libavformat/yuv4mpeg.c      |  4 ++--
 31 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 3f4cc2fd04..29f6950d08 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3720,7 +3720,7 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
 static void new_data_stream(AVFormatContext *oc, int file_idx)
 {
     AVStream *st;
-    AVOutputStream *ost;
+    AVOutputStream *ost av_unused;
     AVCodec *codec=NULL;
     AVCodecContext *data_enc;
 
diff --git a/ffplay.c b/ffplay.c
index 9b9b665385..ffe1c0f12d 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1438,7 +1438,7 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts1, int64_
 
 static int get_video_frame(VideoState *is, AVFrame *frame, int64_t *pts, AVPacket *pkt)
 {
-    int len1, got_picture, i;
+    int len1 av_unused, got_picture, i;
 
     if (packet_queue_get(&is->videoq, pkt, 1) < 0)
         return -1;
@@ -1813,7 +1813,7 @@ static int subtitle_thread(void *arg)
     VideoState *is = arg;
     SubPicture *sp;
     AVPacket pkt1, *pkt = &pkt1;
-    int len1, got_subtitle;
+    int len1 av_unused, got_subtitle;
     double pts;
     int i, j;
     int r, g, b, y, u, v, a;
diff --git a/libavcodec/a64multienc.c b/libavcodec/a64multienc.c
index e9b3471925..5a665d0592 100644
--- a/libavcodec/a64multienc.c
+++ b/libavcodec/a64multienc.c
@@ -252,7 +252,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     int b_width;
 
     int req_size;
-    int num_frames   = c->mc_lifetime;
 
     int *charmap     = c->mc_charmap;
     uint8_t *colram  = c->mc_colram;
@@ -280,7 +279,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         if (!c->mc_lifetime) return 0;
         /* no more frames in queue, prepare to flush remaining frames */
         if (!c->mc_frame_counter) {
-            num_frames = c->mc_lifetime;
             c->mc_lifetime = 0;
         }
         /* still frames in queue so limit lifetime to remaining frames */
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index 26a62eed14..8b68c2ffef 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -109,7 +109,7 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
     old_out2 = out[-2];
     old_out3 = out[-1];
     for (n = 0; n <= buffer_length - 4; n+=4) {
-        float tmp0,tmp1,tmp2,tmp3;
+        float tmp0,tmp1,tmp2;
         float val;
 
         out0 = in[0];
@@ -160,7 +160,6 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
         tmp0 = out0;
         tmp1 = out1;
         tmp2 = out2;
-        tmp3 = out3;
 
         out3 -= a * tmp2;
         out2 -= a * tmp1;
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 74bae4e295..074f998995 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1535,8 +1535,8 @@ static void dca_exss_parse_header(DCAContext *s)
 {
     int ss_index;
     int blownup;
-    int header_size;
-    int hd_size;
+    int header_size av_unused;
+    int hd_size av_unused;
     int num_audiop = 1;
     int num_assets = 1;
     int active_ss_mask[8];
@@ -1622,7 +1622,6 @@ static int dca_decode_frame(AVCodecContext * avctx,
 {
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
-    int data_size_tmp;
 
     int lfe_samples;
     int num_core_channels = 0;
diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c
index c8dc7a0f62..f39e9996d6 100644
--- a/libavcodec/dirac.c
+++ b/libavcodec/dirac.c
@@ -245,7 +245,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
 int ff_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
                                    dirac_source_params *source)
 {
-    unsigned version_major, version_minor;
+    unsigned version_major, version_minor av_unused;
     unsigned video_format, picture_coding_mode;
 
     version_major  = svq3_get_ue_golomb(gb);
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index cbecfa8a87..2ce7ea00b6 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -235,7 +235,7 @@ int rv_decode_dc(MpegEncContext *s, int n)
 /* read RV 1.0 compatible frame header */
 static int rv10_decode_picture_header(MpegEncContext *s)
 {
-    int mb_count, pb_frame, marker, unk, mb_xy;
+    int mb_count, pb_frame, marker, unk av_unused, mb_xy;
 
     marker = get_bits1(&s->gb);
 
diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index d8b2b38a02..d707208b6c 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c
@@ -29,7 +29,7 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
                                     int buf_size)
 {
     uint32_t h;
-    int frame_size, channels, id, bits;
+    int frame_size, channels, id av_unused, bits;
 
     if (buf_size <= AES3_HEADER_LEN) {
         av_log(avctx, AV_LOG_ERROR, "frame is too short\n");
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index f50da4e87f..b2a05cc259 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -196,7 +196,7 @@ static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header
 {
     GetBitContext hb;
     int len;
-    int chunk_size;
+    int chunk_size av_unused;
     short wave_format;
 
     init_get_bits(&hb, header, header_size*8);
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 1af978f21b..0b56c101db 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -38,15 +38,12 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     int buf_size = avpkt->size;
     AVPacket avpkt_recoded;
     const int qscale = 5;
-    const uint8_t *buf_ptr;
     uint8_t *recoded;
     int i = 0, j = 0;
 
     if (!avctx->width || !avctx->height)
         return -1;
 
-    buf_ptr = buf;
-
     recoded = av_mallocz(buf_size + 1024);
     if (!recoded)
         return -1;
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index 122049c957..f09a5edd99 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -201,7 +201,6 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf)
 {
     uint32_t magic;
     const uint8_t *obuf;
-    int length;
 
     obuf = buf;
 
@@ -212,7 +211,7 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf)
 /*      av_log (ctx->avctx, AV_LOG_ERROR, "TM2 old header: not implemented (yet)\n"); */
         return 40;
     } else if(magic == 0x00000101) { /* new header */
-        int w, h, size, flags, xr, yr;
+        av_unused int w, h, size, flags, xr, yr, length;
 
         length = AV_RL32(buf);
         buf += 4;
diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c
index 3b3f6e0444..6c92a0ff72 100644
--- a/libavcodec/vaapi_mpeg2.c
+++ b/libavcodec/vaapi_mpeg2.c
@@ -109,7 +109,7 @@ static int vaapi_mpeg2_decode_slice(AVCodecContext *avctx, const uint8_t *buffer
     MpegEncContext * const s = avctx->priv_data;
     VASliceParameterBufferMPEG2 *slice_param;
     GetBitContext gb;
-    uint32_t start_code, quantiser_scale_code, intra_slice_flag, macroblock_offset;
+    uint32_t start_code av_unused, quantiser_scale_code, intra_slice_flag, macroblock_offset;
 
     av_dlog(avctx, "vaapi_mpeg2_decode_slice(): buffer %p, size %d\n", buffer, size);
 
diff --git a/libavcodec/vmdav.c b/libavcodec/vmdav.c
index 8d8bc61e42..934a52b939 100644
--- a/libavcodec/vmdav.c
+++ b/libavcodec/vmdav.c
@@ -199,7 +199,6 @@ static void vmd_decode(VmdVideoContext *s)
 
     int frame_x, frame_y;
     int frame_width, frame_height;
-    int dp_size;
 
     frame_x = AV_RL16(&s->buf[6]);
     frame_y = AV_RL16(&s->buf[8]);
@@ -247,7 +246,6 @@ static void vmd_decode(VmdVideoContext *s)
         }
 
         dp = &s->frame.data[0][frame_y * s->frame.linesize[0] + frame_x];
-        dp_size = s->frame.linesize[0] * s->avctx->height;
         pp = &s->prev_frame.data[0][frame_y * s->prev_frame.linesize[0] + frame_x];
         switch (meth) {
         case 1:
diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 55aa7b936b..4c98987fea 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -181,7 +181,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void
         int x, y, bh2, bw2, xored;
         uint8_t *tsrc, *tprev;
         uint8_t *mv;
-        int mx, my, bv;
+        int mx, my;
 
         bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
         bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
@@ -197,7 +197,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void
                 tsrc = src + x;
                 tprev = prev + x;
 
-                bv = zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored);
+                zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored);
                 mv[0] = (mx << 1) | !!xored;
                 mv[1] = my << 1;
                 tprev += mx + my * c->pstride;
diff --git a/libavfilter/vf_fieldorder.c b/libavfilter/vf_fieldorder.c
index 59ca77821a..0913b6950e 100644
--- a/libavfilter/vf_fieldorder.c
+++ b/libavfilter/vf_fieldorder.c
@@ -153,7 +153,7 @@ static void end_frame(AVFilterLink *inlink)
     AVFilterBufferRef *inpicref   = inlink->cur_buf;
     AVFilterBufferRef *outpicref  = outlink->out_buf;
 
-    int               h, w, plane, line_step, line_size, line;
+    int               h, plane, line_step, line_size, line;
     uint8_t           *cpy_src, *cpy_dst;
 
     if (    inpicref->video->interlaced
@@ -162,7 +162,6 @@ static void end_frame(AVFilterLink *inlink)
                 "picture will move %s one line\n",
                 fieldorder->dst_tff ? "up" : "down");
         h = inpicref->video->h;
-        w = inpicref->video->w;
         for (plane = 0; plane < 4 && inpicref->data[plane]; plane++) {
             line_step = inpicref->linesize[plane];
             line_size = fieldorder->line_size[plane];
diff --git a/libavformat/4xm.c b/libavformat/4xm.c
index 699277248d..42ecbde581 100644
--- a/libavformat/4xm.c
+++ b/libavformat/4xm.c
@@ -246,7 +246,7 @@ static int fourxm_read_packet(AVFormatContext *s,
     FourxmDemuxContext *fourxm = s->priv_data;
     AVIOContext *pb = s->pb;
     unsigned int fourcc_tag;
-    unsigned int size, out_size;
+    unsigned int size, out_size av_unused;
     int ret = 0;
     unsigned int track_number;
     int packet_read = 0;
diff --git a/libavformat/apetag.c b/libavformat/apetag.c
index fb46d2bac2..4a464de1f5 100644
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -35,7 +35,7 @@ static int ape_tag_read_field(AVFormatContext *s)
 {
     AVIOContext *pb = s->pb;
     uint8_t key[1024], *value;
-    uint32_t size, flags;
+    uint32_t size, flags av_unused;
     int i, c;
 
     size = avio_rl32(pb);  /* field size */
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 3564bf465f..e2efeef3ea 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -209,7 +209,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
     ff_asf_guid g;
     enum AVMediaType type;
     int type_specific_size, sizeX;
-    uint64_t total_size;
+    uint64_t total_size av_unused;
     unsigned int tag1;
     int64_t pos1, pos2, start_time;
     int test_for_ext_stream_audio, is_dvr_ms_audio=0;
@@ -393,7 +393,7 @@ static int asf_read_ext_stream_properties(AVFormatContext *s, int64_t size)
     AVIOContext *pb = s->pb;
     ff_asf_guid g;
     int ext_len, payload_ext_ct, stream_ct, i;
-    uint32_t ext_d, leak_rate, stream_num;
+    uint32_t ext_d av_unused, leak_rate, stream_num;
     unsigned int stream_languageid_index;
 
     avio_rl64(pb); // starttime
@@ -511,7 +511,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size)
 {
     AVIOContext *pb = s->pb;
     ASFContext *asf = s->priv_data;
-    int n, stream_num, name_len, value_len, value_type, value_num;
+    int n, stream_num, name_len, value_len, value_type av_unused, value_num;
     int ret, i;
     n = avio_rl16(pb);
 
@@ -626,7 +626,7 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap)
             // if so the next iteration will pick it up
             continue;
         } else if (!ff_guidcmp(&g, &ff_asf_head1_guid)) {
-            int v1, v2;
+            av_unused int v1, v2;
             ff_get_guid(pb, &g);
             v1 = avio_rl32(pb);
             v2 = avio_rl16(pb);
@@ -799,7 +799,7 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
     ASFContext *asf = s->priv_data;
     int rsize = 1;
     int num = avio_r8(pb);
-    int64_t ts0, ts1;
+    int64_t ts0, ts1 av_unused;
 
     asf->packet_segments--;
     asf->packet_key_frame = num >> 7;
diff --git a/libavformat/mmf.c b/libavformat/mmf.c
index fc6fcc3caa..b75bee3a6b 100644
--- a/libavformat/mmf.c
+++ b/libavformat/mmf.c
@@ -186,7 +186,7 @@ static int mmf_read_header(AVFormatContext *s,
     unsigned int tag;
     AVIOContext *pb = s->pb;
     AVStream *st;
-    int64_t file_size, size;
+    int64_t file_size av_unused, size;
     int rate, params;
 
     tag = avio_rl32(pb);
@@ -263,12 +263,10 @@ static int mmf_read_packet(AVFormatContext *s,
                            AVPacket *pkt)
 {
     MMFContext *mmf = s->priv_data;
-    AVStream *st;
     int ret, size;
 
     if (url_feof(s->pb))
         return AVERROR(EIO);
-    st = s->streams[0];
 
     size = MAX_SIZE;
     if(size > mmf->data_size)
diff --git a/libavformat/mov.c b/libavformat/mov.c
index e51116b304..1af1cdeb63 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -469,21 +469,21 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
-    int tag, len;
+    int tag;
 
     if (fc->nb_streams < 1)
         return 0;
     st = fc->streams[fc->nb_streams-1];
 
     avio_rb32(pb); /* version + flags */
-    len = ff_mp4_read_descr(fc, pb, &tag);
+    ff_mp4_read_descr(fc, pb, &tag);
     if (tag == MP4ESDescrTag) {
         avio_rb16(pb); /* ID */
         avio_r8(pb); /* priority */
     } else
         avio_rb16(pb); /* ID */
 
-    len = ff_mp4_read_descr(fc, pb, &tag);
+    ff_mp4_read_descr(fc, pb, &tag);
     if (tag == MP4DecConfigDescrTag)
         ff_mp4_read_dec_config_descr(fc, st, pb);
     return 0;
diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 344bd1ccd8..968cdd90e3 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -197,8 +197,8 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     int flags, nsegs;
     uint64_t gp;
     uint32_t serial;
-    uint32_t seq;
-    uint32_t crc;
+    uint32_t seq av_unused;
+    uint32_t crc av_unused;
     int size, idx;
     uint8_t sync[4];
     int sp = 0;
diff --git a/libavformat/oggparsecelt.c b/libavformat/oggparsecelt.c
index bbb695f438..2adc06046a 100644
--- a/libavformat/oggparsecelt.c
+++ b/libavformat/oggparsecelt.c
@@ -41,8 +41,8 @@ static int celt_header(AVFormatContext *s, int idx)
 
         /* Main header */
 
-        uint32_t version, header_size, sample_rate, nb_channels, frame_size;
-        uint32_t overlap, bytes_per_packet, extra_headers;
+        uint32_t version, header_size av_unused, sample_rate, nb_channels, frame_size;
+        uint32_t overlap, bytes_per_packet av_unused, extra_headers;
         uint8_t *extradata;
 
         extradata = av_malloc(2 * sizeof(uint32_t) +
diff --git a/libavformat/oggparseogm.c b/libavformat/oggparseogm.c
index dda5be601a..ec575e068d 100644
--- a/libavformat/oggparseogm.c
+++ b/libavformat/oggparseogm.c
@@ -39,7 +39,7 @@ ogm_header(AVFormatContext *s, int idx)
     const uint8_t *p = os->buf + os->pstart;
     uint64_t time_unit;
     uint64_t spu;
-    uint32_t default_len;
+    uint32_t default_len av_unused;
 
     if(!(*p & 1))
         return 0;
diff --git a/libavformat/rl2.c b/libavformat/rl2.c
index 93d4c34459..219aa697b6 100644
--- a/libavformat/rl2.c
+++ b/libavformat/rl2.c
@@ -80,8 +80,8 @@ static av_cold int rl2_read_header(AVFormatContext *s,
     unsigned int audio_frame_counter = 0;
     unsigned int video_frame_counter = 0;
     unsigned int back_size;
-    int data_size;
-    unsigned short encoding_method;
+    int data_size av_unused;
+    unsigned short encoding_method av_unused;
     unsigned short sound_rate;
     unsigned short rate;
     unsigned short channels;
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index 6fb362473e..5c24d021e0 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -280,7 +280,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVIOContext *pb,
         if (rm_read_audio_stream_info(s, pb, st, rst, 0))
             return -1;
     } else {
-        int fps, fps2;
+        int fps, fps2 av_unused;
         if (avio_rl32(pb) != MKTAG('V', 'I', 'D', 'O')) {
         fail1:
             av_log(st->codec, AV_LOG_ERROR, "Unsupported video codec\n");
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index 9702fc7035..be5bf9c788 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -299,7 +299,7 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt)
         stream->codec->codec_tag == 124) {
         // We have to split Escape 124 frames because there are
         // multiple frames per chunk in Escape 124 samples.
-        uint32_t frame_size, frame_flags;
+        uint32_t frame_size, frame_flags av_unused;
 
         frame_flags = avio_rl32(pb);
         frame_size = avio_rl32(pb);
diff --git a/libavformat/rtpdec_latm.c b/libavformat/rtpdec_latm.c
index 2b366a0dc2..74754dfd60 100644
--- a/libavformat/rtpdec_latm.c
+++ b/libavformat/rtpdec_latm.c
@@ -108,7 +108,7 @@ static int parse_fmtp_config(AVStream *st, char *value)
     int len = ff_hex_to_data(NULL, value), i, ret = 0;
     GetBitContext gb;
     uint8_t *config;
-    int audio_mux_version, same_time_framing, num_sub_frames,
+    int audio_mux_version, same_time_framing, num_sub_frames av_unused,
         num_programs, num_layers;
 
     /* Pad this buffer, too, to avoid out of bounds reads with get_bits below */
diff --git a/libavformat/sauce.c b/libavformat/sauce.c
index 41e991e3e7..0b22a75bb9 100644
--- a/libavformat/sauce.c
+++ b/libavformat/sauce.c
@@ -32,7 +32,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g
 {
     AVIOContext *pb = avctx->pb;
     char buf[36];
-    int datatype, filetype, t1, t2, nb_comments, flags;
+    int datatype, filetype, t1, t2, nb_comments, flags av_unused;
     uint64_t start_pos = avio_size(pb) - 128;
 
     avio_seek(pb, start_pos, SEEK_SET);
diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index 2603a4d71c..29a66e79fa 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -233,7 +233,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
     int i;
     int frame_size = 0;
     int palchange = 0;
-    int pos;
 
     if (url_feof(s->pb) || smk->cur_frame >= smk->frames)
         return AVERROR_EOF;
@@ -244,7 +243,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
         frame_size = smk->frm_size[smk->cur_frame] & (~3);
         flags = smk->frm_flags[smk->cur_frame];
         /* handle palette change event */
-        pos = avio_tell(s->pb);
         if(flags & SMACKER_PAL){
             int size, sz, t, off, j, pos;
             uint8_t *pal = smk->pal;
diff --git a/libavformat/sol.c b/libavformat/sol.c
index 7ad894e304..70fddae92a 100644
--- a/libavformat/sol.c
+++ b/libavformat/sol.c
@@ -85,7 +85,7 @@ static int sol_channels(int magic, int type)
 static int sol_read_header(AVFormatContext *s,
                           AVFormatParameters *ap)
 {
-    int size;
+    int size av_unused;
     unsigned int magic,tag;
     AVIOContext *pb = s->pb;
     unsigned int id, channels, rate, type;
diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c
index a852568a64..445ec3701b 100644
--- a/libavformat/yuv4mpeg.c
+++ b/libavformat/yuv4mpeg.c
@@ -94,7 +94,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
     AVPicture *picture;
     int* first_pkt = s->priv_data;
     int width, height, h_chroma_shift, v_chroma_shift;
-    int i, m;
+    int i;
     char buf2[Y4M_LINE_MAX+1];
     char buf1[20];
     uint8_t *ptr, *ptr1, *ptr2;
@@ -114,7 +114,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     /* construct frame header */
 
-    m = snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC);
+    snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC);
     avio_write(pb, buf1, strlen(buf1));
 
     width = st->codec->width;

From a22500744bf878e1c8f9dfd1c6695e861f544780 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 30 May 2011 02:36:49 +0200
Subject: [PATCH 452/830] Revert "Timeout TCP open() after 5 seconds."

This code is redundant and conflicts with lucas reimplementation of it.

This reverts commit a2f5e14a867768019b49b830e29801f1bfb2abb7.
---
 libavformat/tcp.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/libavformat/tcp.c b/libavformat/tcp.c
index b144e31457..963a993495 100644
--- a/libavformat/tcp.c
+++ b/libavformat/tcp.c
@@ -97,7 +97,6 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     }
 
     if (ret < 0) {
-        int timeout=50;
         struct pollfd p = {fd, POLLOUT, 0};
         ret = ff_neterrno();
         if (ret == AVERROR(EINTR)) {
@@ -120,12 +119,6 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
             ret = poll(&p, 1, 100);
             if (ret > 0)
                 break;
-            if(!--timeout){
-                av_log(NULL, AV_LOG_ERROR,
-                    "TCP open %s:%d timeout\n",
-                    hostname, port);
-                goto fail;
-            }
         }
         if (ret <= 0) {
             ret = AVERROR(ETIMEDOUT);

From 35bed44fc95eb1165f75b0c513d743f79ce8410c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 30 May 2011 02:39:26 +0200
Subject: [PATCH 453/830] TCP: change default timeout to 5sec

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/tcp.c b/libavformat/tcp.c
index 963a993495..eb982d5675 100644
--- a/libavformat/tcp.c
+++ b/libavformat/tcp.c
@@ -45,7 +45,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     char buf[256];
     int ret;
     socklen_t optlen;
-    int timeout = 100;
+    int timeout = 50;
     char hostname[1024],proto[1024],path[1024];
     char portstr[10];
 

From 2c10ee23583d80a44f65a2199cef96eafaaceade Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 30 May 2011 03:11:54 +0200
Subject: [PATCH 454/830] error.c: fix compile flags

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/error.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/error.c b/libavutil/error.c
index 5a1ddae5ab..d296395c09 100644
--- a/libavutil/error.c
+++ b/libavutil/error.c
@@ -16,6 +16,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#undef _GNU_SOURCE
 #include "avutil.h"
 #include "avstring.h"
 

From e1197b9e1746c03b1d13d816d1569aeaf1b71ecc Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 29 May 2011 17:57:40 +0200
Subject: [PATCH 455/830] swscale: remove sws_format_name()

Use av_get_pix_fmt_name() instead.
---
 libswscale/bfin/yuv2rgb_bfin.c   |  2 +-
 libswscale/ppc/yuv2rgb_altivec.c |  4 ++--
 libswscale/swscale.c             |  4 ++--
 libswscale/swscale_internal.h    |  2 --
 libswscale/utils.c               | 18 +++++-------------
 libswscale/yuv2rgb.c             |  4 +++-
 6 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c
index eaa83eaf3b..a2be1df0ed 100644
--- a/libswscale/bfin/yuv2rgb_bfin.c
+++ b/libswscale/bfin/yuv2rgb_bfin.c
@@ -197,7 +197,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c)
     }
 
     av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
-           sws_format_name (c->dstFormat));
+           av_get_pix_fmt_name(c->dstFormat));
 
     return f;
 }
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 11d12150a8..ad956b92a1 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -718,7 +718,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                 static int printed_error_message;
                 if (!printed_error_message) {
                     av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                           sws_format_name(c->dstFormat));
+                           av_get_pix_fmt_name(c->dstFormat));
                     printed_error_message=1;
                 }
                 return;
@@ -793,7 +793,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         default:
             /* Unreachable, I think. */
             av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                   sws_format_name(c->dstFormat));
+                   av_get_pix_fmt_name(c->dstFormat));
             return;
         }
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 227126b182..76096e7593 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1492,7 +1492,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 
     if (!conv)
         av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
+               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
     else {
         for (i=0; i<srcSliceH; i++) {
             conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
@@ -1579,7 +1579,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 
     if (!conv) {
         av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
+               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
     } else {
         const uint8_t *srcPtr= src[0];
               uint8_t *dstPtr= dst[0];
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 327919fb55..8577448c38 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -345,8 +345,6 @@ void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                             const int16_t **chrVSrc, int chrFilterSize,
                             uint8_t *dest, int dstW, int dstY);
 
-const char *sws_format_name(enum PixelFormat format);
-
 //FIXME replace this with something faster
 #define is16BPS(x)      (           \
            (x)==PIX_FMT_GRAY16BE    \
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 23ad8279cf..3801ea2fd7 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -165,14 +165,6 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt)
 
 extern const int32_t ff_yuv2rgb_coeffs[8][4];
 
-const char *sws_format_name(enum PixelFormat format)
-{
-    if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name)
-        return av_pix_fmt_descriptors[format].name;
-    else
-        return "Unknown format";
-}
-
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
     if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
@@ -766,11 +758,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     unscaled = (srcW == dstW && srcH == dstH);
 
     if (!isSupportedIn(srcFormat)) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
+        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat));
         return AVERROR(EINVAL);
     }
     if (!isSupportedOut(dstFormat)) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
+        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat));
         return AVERROR(EINVAL);
     }
 
@@ -845,7 +837,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         if (c->swScale) {
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
-                       sws_format_name(srcFormat), sws_format_name(dstFormat));
+                       av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
             return 0;
         }
     }
@@ -1042,7 +1034,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         else                              av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
 
         av_log(c, AV_LOG_INFO, "from %s to %s%s ",
-               sws_format_name(srcFormat),
+               av_get_pix_fmt_name(srcFormat),
 #ifdef DITHER1XBPP
                dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ||
                dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
@@ -1050,7 +1042,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 #else
                "",
 #endif
-               sws_format_name(dstFormat));
+               av_get_pix_fmt_name(dstFormat));
 
         if      (HAVE_MMX2     && cpu_flags & AV_CPU_FLAG_MMX2)    av_log(c, AV_LOG_INFO, "using MMX2\n");
         else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)   av_log(c, AV_LOG_INFO, "using 3DNOW\n");
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index c9bd1b7337..36182a5ea9 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -34,6 +34,7 @@
 #include "swscale_internal.h"
 #include "libavutil/cpu.h"
 #include "libavutil/bswap.h"
+#include "libavutil/pixdesc.h"
 
 extern const uint8_t dither_4x4_16[4][8];
 extern const uint8_t dither_8x8_32[8][8];
@@ -521,7 +522,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     if (t)
         return t;
 
-    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
+    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n",
+           av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
 
     switch (c->dstFormat) {
     case PIX_FMT_BGR48BE:

From 50fee0fc8b2540eade190575ab7802826597bf32 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 01:42:45 +0200
Subject: [PATCH 456/830] rawdec: fail in case of unknow pixel format

---
 libavcodec/rawdec.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index f8e119b017..503351e68a 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -97,6 +97,11 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
     else if (avctx->pix_fmt == PIX_FMT_NONE && avctx->bits_per_coded_sample)
         avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_avi, avctx->bits_per_coded_sample);
 
+    if (avctx->pix_fmt == PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Pixel format was not specified and cannot be detected\n");
+        return AVERROR(EINVAL);
+    }
+
     ff_set_systematic_pal2(context->palette, avctx->pix_fmt);
     context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
     if((avctx->bits_per_coded_sample == 4 || avctx->bits_per_coded_sample == 2) &&

From 7b017086d4262199893117e19755763b7755f03d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 21:02:14 +0200
Subject: [PATCH 457/830] v4l2: replace memset() with explicit struct
 initialization

---
 libavdevice/v4l2.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 9bbd01df9e..bff40c8902 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -153,10 +153,9 @@ static int device_init(AVFormatContext *ctx, int *width, int *height, uint32_t p
 {
     struct video_data *s = ctx->priv_data;
     int fd = s->fd;
-    struct v4l2_format fmt;
+    struct v4l2_format fmt = {0};
     int res;
 
-    memset(&fmt, 0, sizeof(struct v4l2_format));
     fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
     fmt.fmt.pix.width = *width;
     fmt.fmt.pix.height = *height;
@@ -239,10 +238,9 @@ static enum CodecID fmt_v4l2codec(uint32_t v4l2_fmt)
 static int mmap_init(AVFormatContext *ctx)
 {
     struct video_data *s = ctx->priv_data;
-    struct v4l2_requestbuffers req;
+    struct v4l2_requestbuffers req = {0};
     int i, res;
 
-    memset(&req, 0, sizeof(struct v4l2_requestbuffers));
     req.count = desired_video_buffers;
     req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
     req.memory = V4L2_MEMORY_MMAP;
@@ -274,9 +272,8 @@ static int mmap_init(AVFormatContext *ctx)
     }
 
     for (i = 0; i < req.count; i++) {
-        struct v4l2_buffer buf;
+        struct v4l2_buffer buf = {0};
 
-        memset(&buf, 0, sizeof(struct v4l2_buffer));
         buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
         buf.memory = V4L2_MEMORY_MMAP;
         buf.index = i;
@@ -310,7 +307,7 @@ static int read_init(AVFormatContext *ctx)
 
 static void mmap_release_buffer(AVPacket *pkt)
 {
-    struct v4l2_buffer buf;
+    struct v4l2_buffer buf = {0};
     int res, fd;
     struct buff_data *buf_descriptor = pkt->priv;
 
@@ -318,7 +315,6 @@ static void mmap_release_buffer(AVPacket *pkt)
          return;
     }
 
-    memset(&buf, 0, sizeof(struct v4l2_buffer));
     buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
     buf.memory = V4L2_MEMORY_MMAP;
     buf.index = buf_descriptor->index;
@@ -336,11 +332,10 @@ static void mmap_release_buffer(AVPacket *pkt)
 static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
 {
     struct video_data *s = ctx->priv_data;
-    struct v4l2_buffer buf;
+    struct v4l2_buffer buf = {0};
     struct buff_data *buf_descriptor;
     int res;
 
-    memset(&buf, 0, sizeof(struct v4l2_buffer));
     buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
     buf.memory = V4L2_MEMORY_MMAP;
 
@@ -395,9 +390,8 @@ static int mmap_start(AVFormatContext *ctx)
     int i, res;
 
     for (i = 0; i < s->buffers; i++) {
-        struct v4l2_buffer buf;
+        struct v4l2_buffer buf = {0};
 
-        memset(&buf, 0, sizeof(struct v4l2_buffer));
         buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
         buf.memory = V4L2_MEMORY_MMAP;
         buf.index  = i;
@@ -439,8 +433,8 @@ static void mmap_close(struct video_data *s)
 static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 {
     struct video_data *s = s1->priv_data;
-    struct v4l2_input input;
-    struct v4l2_standard standard;
+    struct v4l2_input input = {0};
+    struct v4l2_standard standard = {0};
     struct v4l2_streamparm streamparm = { 0 };
     struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe;
     int i;
@@ -453,7 +447,6 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 #endif
 
     /* set tv video input */
-    memset (&input, 0, sizeof (input));
     input.index = s->channel;
     if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) {
         av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n");
@@ -479,7 +472,6 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s\n",
                s->standard);
         /* set tv standard */
-        memset (&standard, 0, sizeof (standard));
         for(i=0;;i++) {
             standard.index = i;
             if (ioctl(s->fd, VIDIOC_ENUMSTD, &standard) < 0) {

From 2d48515eb7a6db81ca067db0f2b986c58360115a Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 21:04:29 +0200
Subject: [PATCH 458/830] v4l2: perform minor style fixes

---
 libavdevice/v4l2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index bff40c8902..d1bf572418 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -350,7 +350,7 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
 
         return AVERROR(errno);
     }
-    assert (buf.index < s->buffers);
+    assert(buf.index < s->buffers);
     if (s->frame_size > 0 && buf.bytesused != s->frame_size) {
         av_log(ctx, AV_LOG_ERROR, "The v4l2 frame is %d bytes, but %d bytes are expected\n", buf.bytesused, s->frame_size);
         return AVERROR_INVALIDDATA;
@@ -435,7 +435,7 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     struct video_data *s = s1->priv_data;
     struct v4l2_input input = {0};
     struct v4l2_standard standard = {0};
-    struct v4l2_streamparm streamparm = { 0 };
+    struct v4l2_streamparm streamparm = {0};
     struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe;
     int i;
 
@@ -472,7 +472,7 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s\n",
                s->standard);
         /* set tv standard */
-        for(i=0;;i++) {
+        for (i = 0;; i++) {
             standard.index = i;
             if (ioctl(s->fd, VIDIOC_ENUMSTD, &standard) < 0) {
                 av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n",

From 7533a727f9efed2d0eb6c8b452ff9e35214f5c8e Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 28 May 2011 21:14:06 +0200
Subject: [PATCH 459/830] v4l2: rewrite code iterating the supported standards

Simplify/clarify the code logic and error reporting.
---
 libavdevice/v4l2.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index d1bf572418..605b7b748e 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -437,7 +437,7 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     struct v4l2_standard standard = {0};
     struct v4l2_streamparm streamparm = {0};
     struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe;
-    int i;
+    int i, ret;
 
     streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
@@ -474,15 +474,13 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         /* set tv standard */
         for (i = 0;; i++) {
             standard.index = i;
-            if (ioctl(s->fd, VIDIOC_ENUMSTD, &standard) < 0) {
-                av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n",
-                       s->standard);
-                return AVERROR(EIO);
-            }
-
-            if (!strcasecmp(standard.name, s->standard)) {
+            ret = ioctl(s->fd, VIDIOC_ENUMSTD, &standard);
+            if (ret < 0 || !strcasecmp(standard.name, s->standard))
                 break;
-            }
+        }
+        if (ret < 0) {
+            av_log(s1, AV_LOG_ERROR, "Unknown standard '%s'\n", s->standard);
+            return ret;
         }
 
         av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s, id: %"PRIu64"\n",

From f712f6c8a4bd14bae3c41118af642b5dae7f6e2b Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 29 May 2011 10:42:44 +0200
Subject: [PATCH 460/830] ffmpeg: simplify opt_*_codec() options

Replace opt_{audio,video,subtitle,data}_codec() with a single
opt_codec() function.
---
 ffmpeg.c | 63 +++++++++++++++++++++-----------------------------------
 1 file changed, 23 insertions(+), 40 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 29f6950d08..220feb298d 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2999,9 +2999,15 @@ static int opt_video_standard(const char *opt, const char *arg)
     return 0;
 }
 
-static int opt_codec(int *pstream_copy, char **pcodec_name,
-                      int codec_type, const char *arg)
+static int opt_codec(const char *opt, const char *arg)
 {
+    int *pstream_copy; char **pcodec_name; enum AVMediaType codec_type;
+
+    if      (!strcmp(opt, "acodec")) { pstream_copy = &audio_stream_copy;    pcodec_name = &audio_codec_name;    codec_type = AVMEDIA_TYPE_AUDIO;    }
+    else if (!strcmp(opt, "vcodec")) { pstream_copy = &video_stream_copy;    pcodec_name = &video_codec_name;    codec_type = AVMEDIA_TYPE_VIDEO;    }
+    else if (!strcmp(opt, "scodec")) { pstream_copy = &subtitle_stream_copy; pcodec_name = &subtitle_codec_name; codec_type = AVMEDIA_TYPE_SUBTITLE; }
+    else if (!strcmp(opt, "dcodec")) { pstream_copy = &data_stream_copy;     pcodec_name = &data_codec_name;     codec_type = AVMEDIA_TYPE_DATA;     }
+
     av_freep(pcodec_name);
     if (!strcmp(arg, "copy")) {
         *pstream_copy = 1;
@@ -3011,26 +3017,6 @@ static int opt_codec(int *pstream_copy, char **pcodec_name,
     return 0;
 }
 
-static int opt_audio_codec(const char *opt, const char *arg)
-{
-    return opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg);
-}
-
-static int opt_video_codec(const char *opt, const char *arg)
-{
-    return opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg);
-}
-
-static int opt_subtitle_codec(const char *opt, const char *arg)
-{
-    return opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg);
-}
-
-static int opt_data_codec(const char *opt, const char *arg)
-{
-    return opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg);
-}
-
 static int opt_codec_tag(const char *opt, const char *arg)
 {
     char *tail;
@@ -4172,8 +4158,8 @@ static int opt_target(const char *opt, const char *arg)
     }
 
     if(!strcmp(arg, "vcd")) {
-        opt_video_codec("vcodec", "mpeg1video");
-        opt_audio_codec("vcodec", "mp2");
+        opt_codec("vcodec", "mpeg1video");
+        opt_codec("vcodec", "mp2");
         opt_format("f", "vcd");
 
         opt_frame_size("s", norm == PAL ? "352x288" : "352x240");
@@ -4200,8 +4186,8 @@ static int opt_target(const char *opt, const char *arg)
         mux_preload= (36000+3*1200) / 90000.0; //0.44
     } else if(!strcmp(arg, "svcd")) {
 
-        opt_video_codec("vcodec", "mpeg2video");
-        opt_audio_codec("acodec", "mp2");
+        opt_codec("vcodec", "mpeg2video");
+        opt_codec("acodec", "mp2");
         opt_format("f", "svcd");
 
         opt_frame_size("s", norm == PAL ? "480x576" : "480x480");
@@ -4222,8 +4208,8 @@ static int opt_target(const char *opt, const char *arg)
 
     } else if(!strcmp(arg, "dvd")) {
 
-        opt_video_codec("vcodec", "mpeg2video");
-        opt_audio_codec("vcodec", "ac3");
+        opt_codec("vcodec", "mpeg2video");
+        opt_codec("vcodec", "ac3");
         opt_format("f", "dvd");
 
         opt_frame_size("vcodec", norm == PAL ? "720x576" : "720x480");
@@ -4321,14 +4307,11 @@ static int opt_preset(const char *opt, const char *arg)
             fprintf(stderr, "%s: Invalid syntax: '%s'\n", filename, line);
             ffmpeg_exit(1);
         }
-        if(!strcmp(tmp, "acodec")){
-            opt_audio_codec(tmp, tmp2);
-        }else if(!strcmp(tmp, "vcodec")){
-            opt_video_codec(tmp, tmp2);
-        }else if(!strcmp(tmp, "scodec")){
-            opt_subtitle_codec(tmp, tmp2);
-        }else if(!strcmp(tmp, "dcodec")){
-            opt_data_codec(tmp, tmp2);
+        if (!strcmp(tmp, "acodec") ||
+            !strcmp(tmp, "vcodec") ||
+            !strcmp(tmp, "scodec") ||
+            !strcmp(tmp, "dcodec")) {
+            opt_codec(tmp, tmp2);
         }else if(opt_default(tmp, tmp2) < 0){
             fprintf(stderr, "%s: Invalid option or argument: '%s', parsed as '%s' = '%s'\n", filename, line, tmp, tmp2);
             ffmpeg_exit(1);
@@ -4417,7 +4400,7 @@ static const OptionDef options[] = {
     { "vdt", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&video_discard}, "discard threshold", "n" },
     { "qscale", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" },
     { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" },
-    { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
+    { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_codec}, "force video codec ('copy' to copy stream)", "codec" },
     { "me_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
     { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality},
       "use same quantizer as source (implies VBR)" },
@@ -4450,7 +4433,7 @@ static const OptionDef options[] = {
     { "ar", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" },
     { "ac", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" },
     { "an", OPT_BOOL | OPT_AUDIO, {(void*)&audio_disable}, "disable audio" },
-    { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_codec}, "force audio codec ('copy' to copy stream)", "codec" },
+    { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_codec}, "force audio codec ('copy' to copy stream)", "codec" },
     { "atag", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" },
     { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, //
     { "newaudio", OPT_AUDIO, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" },
@@ -4459,7 +4442,7 @@ static const OptionDef options[] = {
 
     /* subtitle options */
     { "sn", OPT_BOOL | OPT_SUBTITLE, {(void*)&subtitle_disable}, "disable subtitle" },
-    { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_subtitle_codec}, "force subtitle codec ('copy' to copy stream)", "codec" },
+    { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_codec}, "force subtitle codec ('copy' to copy stream)", "codec" },
     { "newsubtitle", OPT_SUBTITLE, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" },
     { "slang", HAS_ARG | OPT_STRING | OPT_SUBTITLE, {(void *)&subtitle_language}, "set the ISO 639 language code (3 letters) of the current subtitle stream" , "code" },
     { "stag", HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" },
@@ -4482,7 +4465,7 @@ static const OptionDef options[] = {
     { "spre", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" },
     { "fpre", HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" },
     /* data codec support */
-    { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_data_codec}, "force data codec ('copy' to copy stream)", "codec" },
+    { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_codec}, "force data codec ('copy' to copy stream)", "codec" },
 
     { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
     { NULL, },

From 9362b5094114bd2f0cea74691ad900b19dac64f8 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 29 May 2011 12:47:40 +0200
Subject: [PATCH 461/830] examples: move API examples to a dedicated dir in doc

---
 doc/examples/Makefile                         | 21 +++++++++++++++++++
 {libavcodec => doc/examples}/api-example.c    |  0
 .../examples}/output-example.c                |  0
 libavcodec/Makefile                           |  2 --
 libavformat/Makefile                          |  1 -
 5 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 doc/examples/Makefile
 rename {libavcodec => doc/examples}/api-example.c (100%)
 rename {libavformat => doc/examples}/output-example.c (100%)

diff --git a/doc/examples/Makefile b/doc/examples/Makefile
new file mode 100644
index 0000000000..facbd7e178
--- /dev/null
+++ b/doc/examples/Makefile
@@ -0,0 +1,21 @@
+# use pkg-config for getting CFLAGS abd LDFLAGS
+FFMPEG_LIBS=libavdevice libavformat libavfilter libavcodec libswscale libavutil
+CFLAGS+=$(shell pkg-config  --cflags $(FFMPEG_LIBS))
+LDFLAGS+=$(shell pkg-config --libs $(FFMPEG_LIBS))
+
+EXAMPLES=api-example output-example
+
+OBJS=$(addsuffix .o,$(EXAMPLES))
+
+%: %.o
+	$(CC) $< $(LDFLAGS) -o $@
+
+%.o: %.c
+	$(CC) $< $(CFLAGS) -c -o $@
+
+.phony: all clean
+
+all: $(OBJS) $(EXAMPLES)
+
+clean:
+	rm -rf $(EXAMPLES) $(OBJS)
diff --git a/libavcodec/api-example.c b/doc/examples/api-example.c
similarity index 100%
rename from libavcodec/api-example.c
rename to doc/examples/api-example.c
diff --git a/libavformat/output-example.c b/doc/examples/output-example.c
similarity index 100%
rename from libavformat/output-example.c
rename to doc/examples/output-example.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 95d08eaaa8..76cb8cd161 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -674,8 +674,6 @@ SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_internal.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h
 SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 
-EXAMPLES = api
-
 TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 55f6152f8d..6d3a0276de 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -340,7 +340,6 @@ OBJS-$(CONFIG_UDP_PROTOCOL)              += udp.o
 # libavdevice dependencies
 OBJS-$(CONFIG_JACK_INDEV)                += timefilter.o
 
-EXAMPLES  = output
 TESTPROGS = timefilter
 
 include $(SUBDIR)../subdir.mak

From f501c74d9916187206e8bc23c7ff041d91098677 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 09:12:48 +0200
Subject: [PATCH 462/830] output-example: create @file doxy from text in the
 copyright header

---
 doc/examples/output-example.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/examples/output-example.c b/doc/examples/output-example.c
index f174305fe6..f068e946dd 100644
--- a/doc/examples/output-example.c
+++ b/doc/examples/output-example.c
@@ -1,7 +1,4 @@
 /*
- * Libavformat API example: Output a media file in any supported
- * libavformat format. The default codecs are used.
- *
  * Copyright (c) 2003 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,6 +19,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
+/**
+ * @file
+ * Libavformat API example: Output a media file in any supported
+ * libavformat format. The default codecs are used.
+ */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>

From c328122a8d1d51873b1555ef41b08e5621de6e99 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 09:14:48 +0200
Subject: [PATCH 463/830] api-example: uppercase first letter in "copyright"

Improve consistency.
---
 doc/examples/api-example.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/examples/api-example.c b/doc/examples/api-example.c
index f34075e666..a32c09ab69 100644
--- a/doc/examples/api-example.c
+++ b/doc/examples/api-example.c
@@ -1,5 +1,5 @@
 /*
- * copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2001 Fabrice Bellard
  *
  * This file is part of FFmpeg.
  *

From f86d260df305ebde41093cb6de6de03e0d076356 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <tomas.hardin@codemill.se>
Date: Fri, 27 May 2011 22:01:32 +0200
Subject: [PATCH 464/830] wav: Don't avio_seek() if we know we'll run into EOF

Since we want to break the loop the 'if (data_ofs < 0)' block is moved after the loop.
This fixes ticket #250.
---
 libavformat/wav.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 61261e6807..b475d0cb49 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -341,14 +341,8 @@ static int wav_read_header(AVFormatContext *s,
         size = next_tag(pb, &tag);
         next_tag_ofs = avio_tell(pb) + size;
 
-        if (url_feof(pb)) {
-            if (data_ofs < 0) {
-                av_log(s, AV_LOG_ERROR, "no 'data' tag found\n");
-                return AVERROR_INVALIDDATA;
-            }
-
+        if (url_feof(pb))
             break;
-        }
 
         switch (tag) {
         case MKTAG('f', 'm', 't', ' '):
@@ -370,16 +364,16 @@ static int wav_read_header(AVFormatContext *s,
                 next_tag_ofs = wav->data_end = avio_tell(pb) + data_size;
             } else {
                 data_size = size;
-                wav->data_end = size ? next_tag_ofs : INT64_MAX;
+                next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX;
             }
 
+            data_ofs = avio_tell(pb);
+
             /* don't look for footer metadata if we can't seek or if we don't
              * know where the data tag ends
              */
             if (!pb->seekable || (!rf64 && !size))
                 goto break_loop;
-
-            data_ofs = avio_tell(pb);
             break;
         case MKTAG('f','a','c','t'):
             if(!sample_count)
@@ -390,11 +384,20 @@ static int wav_read_header(AVFormatContext *s,
                 return ret;
             break;
         }
-        avio_seek(pb, next_tag_ofs, SEEK_SET);
+
+        /* seek to next tag unless we know that we'll run into EOF */
+        if ((avio_size(pb) > 0 && next_tag_ofs >= avio_size(pb)) ||
+            avio_seek(pb, next_tag_ofs, SEEK_SET) < 0) {
+            break;
+        }
     }
 break_loop:
-    if (data_ofs >= 0)
-        avio_seek(pb, data_ofs, SEEK_SET);
+    if (data_ofs < 0) {
+        av_log(s, AV_LOG_ERROR, "no 'data' tag found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avio_seek(pb, data_ofs, SEEK_SET);
 
     if (!sample_count && st->codec->channels && av_get_bits_per_sample(st->codec->codec_id))
         sample_count = (data_size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id));

From b7b62c3a53b3d543450451176e88806d21a49b9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Mon, 30 May 2011 19:35:32 +0200
Subject: [PATCH 465/830] Include pixdesc.h for av_get_pix_fmt_name.

Fixes compilation on PPC with Altivec enabled.
---
 libswscale/ppc/yuv2rgb_altivec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index ad956b92a1..626d55f5f7 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -95,6 +95,7 @@ adjustment.
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/cpu.h"
+#include "libavutil/pixdesc.h"
 
 #undef PROFILE_THE_BEAST
 #undef INC_SCALING

From 70564983c5e3a4e62426510aee8cd27bc636774e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Mon, 30 May 2011 20:02:39 +0200
Subject: [PATCH 466/830] Add const to fix "cast discards qualifiers" warnings.

---
 libswscale/swscale_template.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 98de521cb1..3555883f0f 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -441,7 +441,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
 
     if (c->hScale16) {
         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-        c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
+        c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
     } else if (!c->hyscale_fast) {
         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
@@ -486,8 +486,8 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int
 
     if (c->hScale16) {
         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-        c->hScale16(dst1, dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
-        c->hScale16(dst2, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+        c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+        c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
     } else if (!c->hcscale_fast) {
         c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
         c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);

From 3d0424f2ff3ae774d4237954186e4113976827e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Mon, 30 May 2011 20:07:39 +0200
Subject: [PATCH 467/830] Add "const" to avoid "initialization discards
 qualifiers" warning.

---
 libswscale/swscale.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 76096e7593..e7e81d35cb 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1665,7 +1665,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
     uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
     int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
     for (i = 0; i < height; i++) {\
-        uint8_t *dither= dithers[src_depth-9][i&7];\
+        const uint8_t *dither= dithers[src_depth-9][i&7];\
         for (j = 0; j < length-7; j+=8){\
             dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
             dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\

From 6bb70dfd7466ff89259285d5714b5caa6ca954f9 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 30 May 2011 16:37:06 +0100
Subject: [PATCH 468/830] ARM: simplify inline asm with 64-bit operands

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mathops.h     | 13 +++++--------
 libavutil/arm/intreadwrite.h | 17 ++++++++---------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 3a7a1f3ee9..dfa941161a 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -59,19 +59,16 @@ static inline av_const int MULH(int a, int b)
 
 static inline av_const int64_t MUL64(int a, int b)
 {
-    union { uint64_t x; unsigned hl[2]; } x;
-    __asm__ ("smull %0, %1, %2, %3"
-             : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
-    return x.x;
+    int64_t x;
+    __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b));
+    return x;
 }
 #define MUL64 MUL64
 
 static inline av_const int64_t MAC64(int64_t d, int a, int b)
 {
-    union { uint64_t x; unsigned hl[2]; } x = { d };
-    __asm__ ("smlal %0, %1, %2, %3"
-             : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
-    return x.x;
+    __asm__ ("smlal %Q0, %R0, %1, %2" : "+r"(d) : "r"(a), "r"(b));
+    return d;
 }
 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
 #define MLS64(d, a, b) MAC64(d, -(a), b)
diff --git a/libavutil/arm/intreadwrite.h b/libavutil/arm/intreadwrite.h
index a5ee14666a..613abe511c 100644
--- a/libavutil/arm/intreadwrite.h
+++ b/libavutil/arm/intreadwrite.h
@@ -55,22 +55,21 @@ static av_always_inline void AV_WN32(void *p, uint32_t v)
 #define AV_RN64 AV_RN64
 static av_always_inline uint64_t AV_RN64(const void *p)
 {
-    union { uint64_t v; uint32_t hl[2]; } v;
-    __asm__ ("ldr   %0, %2  \n\t"
-             "ldr   %1, %3  \n\t"
-             : "=&r"(v.hl[0]), "=r"(v.hl[1])
+    uint64_t v;
+    __asm__ ("ldr   %Q0, %1  \n\t"
+             "ldr   %R0, %2  \n\t"
+             : "=&r"(v)
              : "m"(*(const uint32_t*)p), "m"(*((const uint32_t*)p+1)));
-    return v.v;
+    return v;
 }
 
 #define AV_WN64 AV_WN64
 static av_always_inline void AV_WN64(void *p, uint64_t v)
 {
-    union { uint64_t v; uint32_t hl[2]; } vv = { v };
-    __asm__ ("str  %2, %0  \n\t"
-             "str  %3, %1  \n\t"
+    __asm__ ("str  %Q2, %0  \n\t"
+             "str  %R2, %1  \n\t"
              : "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1))
-             : "r"(vv.hl[0]), "r"(vv.hl[1]));
+             : "r"(v));
 }
 
 #endif /* HAVE_INLINE_ASM */

From f635a233e377bedc6a39c9d8923ee3039fa5319f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 30 May 2011 20:31:27 +0200
Subject: [PATCH 469/830] swscale: Remove unused variable.

---
 libswscale/ppc/swscale_template.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index ca6777144d..3ee100a57c 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -65,8 +65,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
-
     c->yuv2yuvX     = RENAME(yuv2yuvX    );
     c->yuv2packedX  = RENAME(yuv2packedX );
 }

From a54dceb26af072d049c3f3f003f2a4ab0df26987 Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Tue, 31 May 2011 03:10:35 +0200
Subject: [PATCH 470/830]     Merge remote-tracking branch 'ffmpeg-mt/master'

    * ffmpeg-mt/master:
      Update todo
      The maximum buffer size needs to be 33, not 32

    merged-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/pthread.c | 2 +-
 libavcodec/utils.c   | 2 +-
 mt-work/todo.txt     | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index 6d4ee549cb..4b68bce371 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -55,7 +55,7 @@ typedef struct ThreadContext {
 } ThreadContext;
 
 /// Max number of frame buffers that can be allocated when using frame threads.
-#define MAX_BUFFERS 32
+#define MAX_BUFFERS 33
 
 /**
  * Context used by codec threads and stored in their AVCodecContext thread_opaque.
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 95f41f315a..1c61e68910 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -115,7 +115,7 @@ typedef struct InternalBuffer{
     enum PixelFormat pix_fmt;
 }InternalBuffer;
 
-#define INTERNAL_BUFFER_SIZE 32
+#define INTERNAL_BUFFER_SIZE 33
 
 void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int linesize_align[4]){
     int w_align= 1;
diff --git a/mt-work/todo.txt b/mt-work/todo.txt
index 678d213d8c..861013da87 100644
--- a/mt-work/todo.txt
+++ b/mt-work/todo.txt
@@ -23,8 +23,9 @@ field pictures in the same packet are not optimal. Modify the
 nals_needed check so that the second field's first slice is
 considered as needed, then uncomment the FIXME code in decode_postinit.
 Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4
-- The conformance sample MR3_TANDBERG_B.264 has problems (allocated picture overflow).
-- One 10-bit sample has problems.
+- The code added to shorten frame gaps (to avoid allocating more than 16 new frames)
+appears to be wrong by inspection. It does not handle prev_frame_num > frame_num,
+and "h->frame_num - h->sps.ref_frame_count - 1" should be "h->frame_num - h->sps.ref_frame_count".
 
 mpeg4:
 - Packed B-frames need to be explicitly split up

From a52f598d6301eddc333002c0b2a5e9cb5dda1cf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Mon, 30 May 2011 23:44:54 +0200
Subject: [PATCH 471/830] Port libmpcodec fixes from MPlayer.

---
 libavfilter/libmpcodecs/vf_detc.c  |  2 +-
 libavfilter/libmpcodecs/vf_dint.c  | 12 ++++++------
 libavfilter/libmpcodecs/vf_divtc.c |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libavfilter/libmpcodecs/vf_detc.c b/libavfilter/libmpcodecs/vf_detc.c
index 8dd51da794..28d20e09b7 100644
--- a/libavfilter/libmpcodecs/vf_detc.c
+++ b/libavfilter/libmpcodecs/vf_detc.c
@@ -382,7 +382,7 @@ static void uninit(struct vf_instance *vf)
 }
 
 static struct {
-        char *name;
+        const char *name;
         int (*func)(struct vf_priv_s *p, mp_image_t *new, mp_image_t *old);
         int needread;
 } anal_funcs[] = {
diff --git a/libavfilter/libmpcodecs/vf_dint.c b/libavfilter/libmpcodecs/vf_dint.c
index 7038381221..ac5bf54a54 100644
--- a/libavfilter/libmpcodecs/vf_dint.c
+++ b/libavfilter/libmpcodecs/vf_dint.c
@@ -32,7 +32,7 @@ struct vf_priv_s {
   float sense; // first parameter
   float level; // second parameter
   unsigned int imgfmt;
-  char diff;
+  int diff;
   uint32_t max;
 //  int dfr;
 //  int rdfr;
@@ -73,7 +73,7 @@ static int config (struct vf_instance *vf,
       vf->priv->diff = 31;
     mp_msg (MSGT_VFILTER, MSGL_INFO, "Drop-interlaced: %dx%d diff %d / level %u\n",
            vf->priv->pmpi->width, vf->priv->pmpi->height,
-           (int)vf->priv->diff, (unsigned int)vf->priv->max);
+           vf->priv->diff, (unsigned int)vf->priv->max);
 //    vf->priv->rdfr = vf->priv->dfr = 0;
     vf->priv->was_dint = 0;
     return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
@@ -81,10 +81,10 @@ static int config (struct vf_instance *vf,
 
 static int put_image (struct vf_instance *vf, mp_image_t *mpi, double pts)
 {
-    char rrow0[MAXROWSIZE];
-    char rrow1[MAXROWSIZE];
-    char rrow2[MAXROWSIZE];
-    char *row0 = rrow0, *row1 = rrow1, *row2 = rrow2/*, *row3 = rrow3*/;
+    int8_t rrow0[MAXROWSIZE];
+    int8_t rrow1[MAXROWSIZE];
+    int8_t rrow2[MAXROWSIZE];
+    int8_t *row0 = rrow0, *row1 = rrow1, *row2 = rrow2/*, *row3 = rrow3*/;
     int rowsize = mpi->width;
     uint32_t nok = 0, max = vf->priv->max;
     int diff = vf->priv->diff;
diff --git a/libavfilter/libmpcodecs/vf_divtc.c b/libavfilter/libmpcodecs/vf_divtc.c
index 25447f0596..3ead47290d 100644
--- a/libavfilter/libmpcodecs/vf_divtc.c
+++ b/libavfilter/libmpcodecs/vf_divtc.c
@@ -42,7 +42,7 @@ struct vf_priv_s
       ocount, sum[5];
    double threshold;
    FILE *file;
-   char *bdata;
+   int8_t *bdata;
    unsigned int *csdata;
    int *history;
    };
@@ -384,8 +384,8 @@ static int analyze(struct vf_priv_s *p)
    {
    int *buf=0, *bp, bufsize=0, n, b, f, i, j, m, s;
    unsigned int *cbuf=0, *cp;
-   char *pbuf;
-   char lbuf[256];
+   int8_t *pbuf;
+   int8_t lbuf[256];
    int sum[5];
    double d;
 

From 9ebcf7699bced12d4b7e326cfbb1f9ffb59ec794 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 29 May 2011 18:53:42 -0400
Subject: [PATCH 472/830] vp8: fix segmentation race during frame-threading.

Fixes occasional failure of make fate-vp8-test-vector-010 with
frame-multithreading enabled.
---
 libavcodec/vp8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 5500706494..282d2fdb4e 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1612,7 +1612,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 
         s->mv_min.x = -MARGIN;
         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
-        if (prev_frame && s->segmentation.enabled && s->segmentation.update_map)
+        if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
             ff_thread_await_progress(prev_frame, mb_y, 0);
 
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {

From 6f1ec38ce2193d3d4cacd87edb452c6d7ba751ec Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 13:36:21 +0100
Subject: [PATCH 473/830] mpegaudio: clean up compute_antialias() definition

This merges the float and fixed-point versions of the compute_antialias
function, fixes invalid array indexing, and eliminates a dead copy of
csa_table.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodec.c       | 66 ++++++++++++++++++---------------
 libavcodec/mpegaudiodec_float.c | 39 -------------------
 2 files changed, 36 insertions(+), 69 deletions(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index ccc93ad78a..b9d705bcc7 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -112,8 +112,6 @@ typedef struct MPADecodeContext {
 #include "mpegaudiodata.h"
 #include "mpegaudiodectab.h"
 
-static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
-
 /* vlc structure for decoding layer 3 huffman tables */
 static VLC huff_vlc[16];
 static VLC_TYPE huff_vlc_tables[
@@ -135,8 +133,7 @@ static uint16_t band_index_long[9][23];
 /* intensity stereo coef table */
 static INTFLOAT is_table[2][16];
 static INTFLOAT is_table_lsf[2][2][16];
-static int32_t csa_table[8][4];
-static float csa_table_float[8][4];
+static INTFLOAT csa_table[8][4];
 static INTFLOAT mdct_win[8][36];
 
 static int16_t division_tab3[1<<6 ];
@@ -441,14 +438,17 @@ static av_cold int decode_init(AVCodecContext * avctx)
             ci = ci_table[i];
             cs = 1.0 / sqrt(1.0 + ci * ci);
             ca = cs * ci;
+#if !CONFIG_FLOAT
             csa_table[i][0] = FIXHR(cs/4);
             csa_table[i][1] = FIXHR(ca/4);
             csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
             csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
-            csa_table_float[i][0] = cs;
-            csa_table_float[i][1] = ca;
-            csa_table_float[i][2] = ca + cs;
-            csa_table_float[i][3] = ca - cs;
+#else
+            csa_table[i][0] = cs;
+            csa_table[i][1] = ca;
+            csa_table[i][2] = ca + cs;
+            csa_table[i][3] = ca - cs;
+#endif
         }
 
         /* compute mdct windows */
@@ -1335,10 +1335,26 @@ static void compute_stereo(MPADecodeContext *s,
     }
 }
 
-#if !CONFIG_FLOAT
-static void compute_antialias_fixed(MPADecodeContext *s, GranuleDef *g)
+#if CONFIG_FLOAT
+#define AA(j) do {                                                      \
+        float tmp0 = ptr[-1-j];                                         \
+        float tmp1 = ptr[   j];                                         \
+        ptr[-1-j] = tmp0 * csa_table[j][0] - tmp1 * csa_table[j][1];    \
+        ptr[   j] = tmp0 * csa_table[j][1] + tmp1 * csa_table[j][0];    \
+    } while (0)
+#else
+#define AA(j) do {                                              \
+        int tmp0 = ptr[-1-j];                                   \
+        int tmp1 = ptr[   j];                                   \
+        int tmp2 = MULH(tmp0 + tmp1, csa_table[j][0]);          \
+        ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa_table[j][2]));     \
+        ptr[   j] = 4*(tmp2 + MULH(tmp0, csa_table[j][3]));     \
+    } while (0)
+#endif
+
+static void compute_antialias(MPADecodeContext *s, GranuleDef *g)
 {
-    int32_t *ptr, *csa;
+    INTFLOAT *ptr;
     int n, i;
 
     /* we antialias only "long" bands */
@@ -1353,28 +1369,18 @@ static void compute_antialias_fixed(MPADecodeContext *s, GranuleDef *g)
 
     ptr = g->sb_hybrid + 18;
     for(i = n;i > 0;i--) {
-        int tmp0, tmp1, tmp2;
-        csa = &csa_table[0][0];
-#define INT_AA(j) \
-            tmp0 = ptr[-1-j];\
-            tmp1 = ptr[   j];\
-            tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\
-            ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\
-            ptr[   j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j]));
-
-        INT_AA(0)
-        INT_AA(1)
-        INT_AA(2)
-        INT_AA(3)
-        INT_AA(4)
-        INT_AA(5)
-        INT_AA(6)
-        INT_AA(7)
+        AA(0);
+        AA(1);
+        AA(2);
+        AA(3);
+        AA(4);
+        AA(5);
+        AA(6);
+        AA(7);
 
         ptr += 18;
     }
 }
-#endif
 
 static void compute_imdct(MPADecodeContext *s,
                           GranuleDef *g,
@@ -1703,7 +1709,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
             g = &s->granules[ch][gr];
 
             reorder_block(s, g);
-            RENAME(compute_antialias)(s, g);
+            compute_antialias(s, g);
             compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
         }
     } /* gr */
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index 94463a824e..0ff866af31 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -22,45 +22,6 @@
 #define CONFIG_FLOAT 1
 #include "mpegaudiodec.c"
 
-static void compute_antialias_float(MPADecodeContext *s,
-                              GranuleDef *g)
-{
-    float *ptr;
-    int n, i;
-
-    /* we antialias only "long" bands */
-    if (g->block_type == 2) {
-        if (!g->switch_point)
-            return;
-        /* XXX: check this for 8000Hz case */
-        n = 1;
-    } else {
-        n = SBLIMIT - 1;
-    }
-
-    ptr = g->sb_hybrid + 18;
-    for(i = n;i > 0;i--) {
-        float tmp0, tmp1;
-        float *csa = &csa_table_float[0][0];
-#define FLOAT_AA(j)\
-        tmp0= ptr[-1-j];\
-        tmp1= ptr[   j];\
-        ptr[-1-j] = tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j];\
-        ptr[   j] = tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j];
-
-        FLOAT_AA(0)
-        FLOAT_AA(1)
-        FLOAT_AA(2)
-        FLOAT_AA(3)
-        FLOAT_AA(4)
-        FLOAT_AA(5)
-        FLOAT_AA(6)
-        FLOAT_AA(7)
-
-        ptr += 18;
-    }
-}
-
 #if CONFIG_MP1FLOAT_DECODER
 AVCodec ff_mp1float_decoder =
 {

From c16919487ec4acc861401d0e2ad2bbdb5cb251d1 Mon Sep 17 00:00:00 2001
From: Gil Pedersen <gil@cmi.aau.dk>
Date: Mon, 2 May 2011 19:25:28 +0200
Subject: [PATCH 474/830] improved 'edts' atom writing support

The 'edts' write function can now generate an initial empty edit resulting in a track-specific presentation delay.
This is automatically calculated and inserted for any track where the initial DTS != 0.
Added support for long (version==1) timecodes.
---
 libavformat/movenc.c | 49 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 2b6539d140..67d39f4aef 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1279,20 +1279,49 @@ static int mov_write_tapt_tag(AVIOContext *pb, MOVTrack *track)
 // This box seems important for the psp playback ... without it the movie seems to hang
 static int mov_write_edts_tag(AVIOContext *pb, MOVTrack *track)
 {
-    avio_wb32(pb, 0x24); /* size  */
+    int64_t duration = av_rescale_rnd(track->trackDuration, MOV_TIMESCALE,
+                                      track->timescale, AV_ROUND_UP);
+    int version = duration < INT32_MAX ? 0 : 1;
+    int entry_size, entry_count, size;
+    int64_t delay, start_ct = track->cluster[0].cts;
+    delay = av_rescale_rnd(track->cluster[0].dts + start_ct, MOV_TIMESCALE,
+                           track->timescale, AV_ROUND_DOWN);
+    version |= delay < INT32_MAX ? 0 : 1;
+
+    entry_size = (version == 1) ? 20 : 12;
+    entry_count = 1 + (delay > 0);
+    size = 24 + entry_count * entry_size;
+
+    /* write the atom data */
+    avio_wb32(pb, size);
     ffio_wfourcc(pb, "edts");
-    avio_wb32(pb, 0x1c); /* size  */
+    avio_wb32(pb, size - 8);
     ffio_wfourcc(pb, "elst");
-    avio_wb32(pb, 0x0);
-    avio_wb32(pb, 0x1);
+    avio_w8(pb, version);
+    avio_wb24(pb, 0); /* flags */
 
-    /* duration   ... doesn't seem to effect psp */
-    avio_wb32(pb, av_rescale_rnd(track->trackDuration, MOV_TIMESCALE,
-                                track->timescale, AV_ROUND_UP));
+    avio_wb32(pb, entry_count);
+    if (delay > 0) { /* add an empty edit to delay presentation */
+        if (version == 1) {
+            avio_wb64(pb, delay);
+            avio_wb64(pb, -1);
+        } else {
+            avio_wb32(pb, delay);
+            avio_wb32(pb, -1);
+        }
+        avio_wb32(pb, 0x00010000);
+    }
 
-    avio_wb32(pb, track->cluster[0].cts); /* first pts is cts since dts is 0 */
+    /* duration */
+    if (version == 1) {
+        avio_wb64(pb, duration);
+        avio_wb64(pb, start_ct);
+    } else {
+        avio_wb32(pb, duration);
+        avio_wb32(pb, start_ct);
+    }
     avio_wb32(pb, 0x00010000);
-    return 0x24;
+    return size;
 }
 
 static int mov_write_tref_tag(AVIOContext *pb, MOVTrack *track)
@@ -1349,7 +1378,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "trak");
     mov_write_tkhd_tag(pb, track, st);
-    if (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS)
+    if (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS || track->cluster[0].dts)
         mov_write_edts_tag(pb, track);  // PSP Movies require edts box
     if (track->tref_tag)
         mov_write_tref_tag(pb, track);

From 84fb4e9df737c856cca7767016110fc74bee56cb Mon Sep 17 00:00:00 2001
From: Piotr Kaczuba <p.kaczuba@attika.ath.cx>
Date: Mon, 30 May 2011 13:19:35 +0200
Subject: [PATCH 475/830] postprocess.c: filter name needs to be double 0
 terminated

---
 libpostproc/postprocess.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index b2c35f537e..bfb96e1a8f 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -86,6 +86,7 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 //#define DEBUG_BRIGHTNESS
 #include "postprocess.h"
 #include "postprocess_internal.h"
+#include "libavutil/avstring.h"
 
 unsigned postproc_version(void)
 {
@@ -766,8 +767,8 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
     ppMode->maxClippedThreshold= 0.01;
     ppMode->error=0;
 
-#undef strncpy
-    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
+    memset(temp, 0, GET_MODE_BUFFER_SIZE);
+    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
 
     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 
@@ -823,7 +824,7 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 
                 plen= strlen(p);
                 spaceLeft= p - temp + plen;
-                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
+                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
                     ppMode->error++;
                     break;
                 }

From fdf18e33bb07d665a86b344e65043692a3de51bb Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 31 May 2011 18:38:01 +0100
Subject: [PATCH 476/830] mpegaudiodec: remove unusued code and variables

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodec.c | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index b9d705bcc7..6910d1fa4c 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -269,27 +269,6 @@ static inline int l3_unscale(int value, int exponent)
     return m;
 }
 
-/* all integer n^(4/3) computation code */
-#define DEV_ORDER 13
-
-#define POW_FRAC_BITS 24
-#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
-#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
-#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
-
-static int dev_4_3_coefs[DEV_ORDER];
-
-static av_cold void int_pow_init(void)
-{
-    int i, a;
-
-    a = POW_FIX(1.0);
-    for(i=0;i<DEV_ORDER;i++) {
-        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
-        dev_4_3_coefs[i] = a;
-    }
-}
-
 static av_cold int decode_init(AVCodecContext * avctx)
 {
     MPADecodeContext *s = avctx->priv_data;
@@ -385,7 +364,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
 
         /* compute n ^ (4/3) and store it in mantissa/exp format */
 
-        int_pow_init();
         mpegaudio_tableinit();
 
         for (i = 0; i < 4; i++)
@@ -1476,7 +1454,7 @@ static void compute_imdct(MPADecodeContext *s,
 /* main layer3 decoding function */
 static int mp_decode_layer3(MPADecodeContext *s)
 {
-    int nb_granules, main_data_begin, private_bits;
+    int nb_granules, main_data_begin;
     int gr, ch, blocksplit_flag, i, j, k, n, bits_pos;
     GranuleDef *g;
     int16_t exponents[576]; //FIXME try INTFLOAT
@@ -1484,14 +1462,14 @@ static int mp_decode_layer3(MPADecodeContext *s)
     /* read side info */
     if (s->lsf) {
         main_data_begin = get_bits(&s->gb, 8);
-        private_bits = get_bits(&s->gb, s->nb_channels);
+        skip_bits(&s->gb, s->nb_channels);
         nb_granules = 1;
     } else {
         main_data_begin = get_bits(&s->gb, 9);
         if (s->nb_channels == 2)
-            private_bits = get_bits(&s->gb, 3);
+            skip_bits(&s->gb, 3);
         else
-            private_bits = get_bits(&s->gb, 5);
+            skip_bits(&s->gb, 5);
         nb_granules = 2;
         for(ch=0;ch<s->nb_channels;ch++) {
             s->granules[ch][0].scfsi = 0;/* all scale factors are transmitted */

From 5ac4952a5808cc8cfe01031ca9d4df7d072f453c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 31 May 2011 14:00:12 +0200
Subject: [PATCH 477/830] vf_drawtext: Replace FFmpeg by Libav in license
 boilerplate.

---
 libavfilter/vf_drawtext.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index b26029bb8f..8b28be9d9c 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram
  * Copyright (c) 2003 Gustavo Sverzut Barbieri <gsbarbieri@yahoo.com.br>
  *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
  *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From 0abbd3adb6a28c088e121bf3da6c04225398bee4 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 31 May 2011 13:48:49 -0700
Subject: [PATCH 478/830] doc: add libvpx encoder section

Documents the mapping from FFmpeg options to libvpx.
---
 doc/encoders.texi | 113 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index f21f8ffd82..7913ffe7c3 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -420,6 +420,119 @@ Selected by Encoder (default)
 A description of some of the currently available video encoders
 follows.
 
+@section libvpx
+
+VP8 format supported through libvpx.
+
+Requires the presence of the libvpx headers and library during configuration.
+You need to explicitly configure the build with @code{--enable-libvpx}.
+
+@subsection Options
+
+Mapping from FFmpeg to libvpx options with conversion notes in parentheses.
+
+@table @option
+
+@item threads
+g_threads
+
+@item profile
+g_profile
+
+@item vb
+rc_target_bitrate
+
+@item g
+kf_max_dist
+
+@item keyint_min
+kf_min_dist
+
+@item qmin
+rc_min_quantizer
+
+@item qmax
+rc_max_quantizer
+
+@item bufsize, vb
+rc_buf_sz
+@code{(bufsize * 1000 / vb)}
+
+rc_buf_optimal_sz
+@code{(bufsize * 1000 / vb * 5 / 6)}
+
+@item rc_init_occupancy, vb
+rc_buf_initial_sz
+@code{(rc_init_occupancy * 1000 / vb)}
+
+@item rc_buffer_aggressivity
+rc_undershoot_pct
+
+@item skip_threshold
+rc_dropframe_thresh
+
+@item qcomp
+rc_2pass_vbr_bias_pct
+
+@item maxrate, vb
+rc_2pass_vbr_maxsection_pct
+@code{(maxrate * 100 / vb)}
+
+@item minrate, vb
+rc_2pass_vbr_minsection_pct
+@code{(minrate * 100 / vb)}
+
+@item minrate, maxrate, vb
+@code{VPX_CBR}
+@code{(minrate == maxrate == vb)}
+
+@item crf
+@code{VPX_CQ}, @code{VP8E_SET_CQ_LEVEL}
+
+@item quality
+@table @option
+@item @var{best}
+@code{VPX_DL_BEST_QUALITY}
+@item @var{good}
+@code{VPX_DL_GOOD_QUALITY}
+@item @var{realtime}
+@code{VPX_DL_REALTIME}
+@end table
+
+@item speed
+@code{VP8E_SET_CPUUSED}
+
+@item nr
+@code{VP8E_SET_NOISE_SENSITIVITY}
+
+@item mb_threshold
+@code{VP8E_SET_STATIC_THRESHOLD}
+
+@item slices
+@code{VP8E_SET_TOKEN_PARTITIONS}
+
+@item Alternate reference frame related
+@table @option
+@item vp8flags altref
+@code{VP8E_SET_ENABLEAUTOALTREF}
+@item @var{arnr_max_frames}
+@code{VP8E_SET_ARNR_MAXFRAMES}
+@item @var{arnr_type}
+@code{VP8E_SET_ARNR_TYPE}
+@item @var{arnr_strength}
+@code{VP8E_SET_ARNR_STRENGTH}
+@item @var{rc_lookahead}
+g_lag_in_frames
+@end table
+
+@item vp8flags error_resilient
+g_error_resilient
+
+@end table
+
+For more information about libvpx see:
+@url{http://www.webmproject.org/}
+
 @section libx264
 
 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 format supported through

From c51695dbf6e05b397ad8ef8e89d27723db5cb9f1 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 31 May 2011 21:04:01 +0100
Subject: [PATCH 479/830] ARM: fix MUL64 inline asm for pre-armv6

Prior to ARMv6, the destination registers of the SMULL instruction
must be distinct from the first source register.  Marking the
output early-clobber ensures it is allocated unique registers.

This restriction is dropped in ARMv6 and later, so allowing overlap
between input and output registers there might give better code.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mathops.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index dfa941161a..7c2acca2e8 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -41,6 +41,8 @@ static inline av_const int MULL(int a, int b, unsigned shift)
 }
 
 #define MULH MULH
+#define MUL64 MUL64
+
 #if HAVE_ARMV6
 static inline av_const int MULH(int a, int b)
 {
@@ -48,6 +50,13 @@ static inline av_const int MULH(int a, int b)
     __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
     return r;
 }
+
+static inline av_const int64_t MUL64(int a, int b)
+{
+    int64_t x;
+    __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b));
+    return x;
+}
 #else
 static inline av_const int MULH(int a, int b)
 {
@@ -55,15 +64,14 @@ static inline av_const int MULH(int a, int b)
     __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
     return hi;
 }
-#endif
 
 static inline av_const int64_t MUL64(int a, int b)
 {
     int64_t x;
-    __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b));
+    __asm__ ("smull %Q0, %R0, %1, %2" : "=&r"(x) : "r"(a), "r"(b));
     return x;
 }
-#define MUL64 MUL64
+#endif
 
 static inline av_const int64_t MAC64(int64_t d, int a, int b)
 {

From b0a4e5f9e765dc601a62f87cf3510ac381c3c4c6 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 31 May 2011 21:23:45 +0200
Subject: [PATCH 480/830] Employ correct printf format specifiers, mostly in
 debug output.

---
 libavfilter/vsrc_movie.c |  2 +-
 libavformat/ape.c        | 54 ++++++++++++++++++++++------------------
 libavformat/mxfdec.c     |  8 +++---
 libavformat/r3d.c        | 13 +++++-----
 4 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/libavfilter/vsrc_movie.c b/libavfilter/vsrc_movie.c
index 5e15524e61..a26787d561 100644
--- a/libavfilter/vsrc_movie.c
+++ b/libavfilter/vsrc_movie.c
@@ -152,7 +152,7 @@ static int movie_init(AVFilterContext *ctx)
     movie->w = movie->codec_ctx->width;
     movie->h = movie->codec_ctx->height;
 
-    av_log(ctx, AV_LOG_INFO, "seek_point:%lld format_name:%s file_name:%s stream_index:%d\n",
+    av_log(ctx, AV_LOG_INFO, "seek_point:%"PRIi64" format_name:%s file_name:%s stream_index:%d\n",
            movie->seek_point, movie->format_name, movie->file_name,
            movie->stream_index);
 
diff --git a/libavformat/ape.c b/libavformat/ape.c
index 0bc7737fde..d7c1447b01 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -101,14 +101,14 @@ static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx)
 
     av_log(s, AV_LOG_DEBUG, "Descriptor Block:\n\n");
     av_log(s, AV_LOG_DEBUG, "magic                = \"%c%c%c%c\"\n", ape_ctx->magic[0], ape_ctx->magic[1], ape_ctx->magic[2], ape_ctx->magic[3]);
-    av_log(s, AV_LOG_DEBUG, "fileversion          = %d\n", ape_ctx->fileversion);
-    av_log(s, AV_LOG_DEBUG, "descriptorlength     = %d\n", ape_ctx->descriptorlength);
-    av_log(s, AV_LOG_DEBUG, "headerlength         = %d\n", ape_ctx->headerlength);
-    av_log(s, AV_LOG_DEBUG, "seektablelength      = %d\n", ape_ctx->seektablelength);
-    av_log(s, AV_LOG_DEBUG, "wavheaderlength      = %d\n", ape_ctx->wavheaderlength);
-    av_log(s, AV_LOG_DEBUG, "audiodatalength      = %d\n", ape_ctx->audiodatalength);
-    av_log(s, AV_LOG_DEBUG, "audiodatalength_high = %d\n", ape_ctx->audiodatalength_high);
-    av_log(s, AV_LOG_DEBUG, "wavtaillength        = %d\n", ape_ctx->wavtaillength);
+    av_log(s, AV_LOG_DEBUG, "fileversion          = %"PRId16"\n", ape_ctx->fileversion);
+    av_log(s, AV_LOG_DEBUG, "descriptorlength     = %"PRIu32"\n", ape_ctx->descriptorlength);
+    av_log(s, AV_LOG_DEBUG, "headerlength         = %"PRIu32"\n", ape_ctx->headerlength);
+    av_log(s, AV_LOG_DEBUG, "seektablelength      = %"PRIu32"\n", ape_ctx->seektablelength);
+    av_log(s, AV_LOG_DEBUG, "wavheaderlength      = %"PRIu32"\n", ape_ctx->wavheaderlength);
+    av_log(s, AV_LOG_DEBUG, "audiodatalength      = %"PRIu32"\n", ape_ctx->audiodatalength);
+    av_log(s, AV_LOG_DEBUG, "audiodatalength_high = %"PRIu32"\n", ape_ctx->audiodatalength_high);
+    av_log(s, AV_LOG_DEBUG, "wavtaillength        = %"PRIu32"\n", ape_ctx->wavtaillength);
     av_log(s, AV_LOG_DEBUG, "md5                  = ");
     for (i = 0; i < 16; i++)
          av_log(s, AV_LOG_DEBUG, "%02x", ape_ctx->md5[i]);
@@ -116,14 +116,14 @@ static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx)
 
     av_log(s, AV_LOG_DEBUG, "\nHeader Block:\n\n");
 
-    av_log(s, AV_LOG_DEBUG, "compressiontype      = %d\n", ape_ctx->compressiontype);
-    av_log(s, AV_LOG_DEBUG, "formatflags          = %d\n", ape_ctx->formatflags);
-    av_log(s, AV_LOG_DEBUG, "blocksperframe       = %d\n", ape_ctx->blocksperframe);
-    av_log(s, AV_LOG_DEBUG, "finalframeblocks     = %d\n", ape_ctx->finalframeblocks);
-    av_log(s, AV_LOG_DEBUG, "totalframes          = %d\n", ape_ctx->totalframes);
-    av_log(s, AV_LOG_DEBUG, "bps                  = %d\n", ape_ctx->bps);
-    av_log(s, AV_LOG_DEBUG, "channels             = %d\n", ape_ctx->channels);
-    av_log(s, AV_LOG_DEBUG, "samplerate           = %d\n", ape_ctx->samplerate);
+    av_log(s, AV_LOG_DEBUG, "compressiontype      = %"PRIu16"\n", ape_ctx->compressiontype);
+    av_log(s, AV_LOG_DEBUG, "formatflags          = %"PRIu16"\n", ape_ctx->formatflags);
+    av_log(s, AV_LOG_DEBUG, "blocksperframe       = %"PRIu32"\n", ape_ctx->blocksperframe);
+    av_log(s, AV_LOG_DEBUG, "finalframeblocks     = %"PRIu32"\n", ape_ctx->finalframeblocks);
+    av_log(s, AV_LOG_DEBUG, "totalframes          = %"PRIu32"\n", ape_ctx->totalframes);
+    av_log(s, AV_LOG_DEBUG, "bps                  = %"PRIu16"\n", ape_ctx->bps);
+    av_log(s, AV_LOG_DEBUG, "channels             = %"PRIu16"\n", ape_ctx->channels);
+    av_log(s, AV_LOG_DEBUG, "samplerate           = %"PRIu32"\n", ape_ctx->samplerate);
 
     av_log(s, AV_LOG_DEBUG, "\nSeektable\n\n");
     if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes) {
@@ -140,12 +140,14 @@ static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx)
 
     av_log(s, AV_LOG_DEBUG, "\nFrames\n\n");
     for (i = 0; i < ape_ctx->totalframes; i++)
-        av_log(s, AV_LOG_DEBUG, "%8d   %8lld %8d (%d samples)\n", i, ape_ctx->frames[i].pos, ape_ctx->frames[i].size, ape_ctx->frames[i].nblocks);
+        av_log(s, AV_LOG_DEBUG, "%8d   %8"PRId64" %8d (%d samples)\n", i,
+               ape_ctx->frames[i].pos, ape_ctx->frames[i].size,
+               ape_ctx->frames[i].nblocks);
 
     av_log(s, AV_LOG_DEBUG, "\nCalculated information:\n\n");
-    av_log(s, AV_LOG_DEBUG, "junklength           = %d\n", ape_ctx->junklength);
-    av_log(s, AV_LOG_DEBUG, "firstframe           = %d\n", ape_ctx->firstframe);
-    av_log(s, AV_LOG_DEBUG, "totalsamples         = %d\n", ape_ctx->totalsamples);
+    av_log(s, AV_LOG_DEBUG, "junklength           = %"PRIu32"\n", ape_ctx->junklength);
+    av_log(s, AV_LOG_DEBUG, "firstframe           = %"PRIu32"\n", ape_ctx->firstframe);
+    av_log(s, AV_LOG_DEBUG, "totalsamples         = %"PRIu32"\n", ape_ctx->totalsamples);
 #endif
 }
 
@@ -169,7 +171,8 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
     ape->fileversion = avio_rl16(pb);
 
     if (ape->fileversion < APE_MIN_VERSION || ape->fileversion > APE_MAX_VERSION) {
-        av_log(s, AV_LOG_ERROR, "Unsupported file version - %d.%02d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10);
+        av_log(s, AV_LOG_ERROR, "Unsupported file version - %"PRId16".%02"PRId16"\n",
+               ape->fileversion / 1000, (ape->fileversion % 1000) / 10);
         return -1;
     }
 
@@ -247,11 +250,12 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
         return AVERROR(EINVAL);
     }
     if(ape->totalframes > UINT_MAX / sizeof(APEFrame)){
-        av_log(s, AV_LOG_ERROR, "Too many frames: %d\n", ape->totalframes);
+        av_log(s, AV_LOG_ERROR, "Too many frames: %"PRIu32"\n",
+               ape->totalframes);
         return -1;
     }
     if (ape->seektablelength && (ape->seektablelength / sizeof(*ape->seektable)) < ape->totalframes) {
-        av_log(s, AV_LOG_ERROR, "Number of seek entries is less than number of frames: %d vs. %d\n",
+        av_log(s, AV_LOG_ERROR, "Number of seek entries is less than number of frames: %ld vs. %"PRIu32"\n",
                ape->seektablelength / sizeof(*ape->seektable), ape->totalframes);
         return AVERROR_INVALIDDATA;
     }
@@ -301,7 +305,9 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
         avio_seek(pb, 0, SEEK_SET);
     }
 
-    av_log(s, AV_LOG_DEBUG, "Decoding file - v%d.%02d, compression level %d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10, ape->compressiontype);
+    av_log(s, AV_LOG_DEBUG, "Decoding file - v%d.%02d, compression level %"PRIu16"\n",
+           ape->fileversion / 1000, (ape->fileversion % 1000) / 10,
+           ape->compressiontype);
 
     /* now we are ready: build format streams */
     st = av_new_stream(s, 0);
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 39c7feada0..82daa2a002 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -309,7 +309,7 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (klv_read_packet(&klv, s->pb) < 0)
             return -1;
         PRINT_KEY(s, "read packet", klv.key);
-        av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset);
+        av_dlog(s, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
         if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) {
             int res = mxf_decrypt_triplet(s, pkt, &klv);
             if (res < 0) {
@@ -522,8 +522,8 @@ static int mxf_read_index_table_segment(void *arg, AVIOContext *pb, int tag, int
     case 0x3F06: av_dlog(NULL, "IndexSID %d\n", avio_rb32(pb)); break;
     case 0x3F07: av_dlog(NULL, "BodySID %d\n", avio_rb32(pb)); break;
     case 0x3F0B: av_dlog(NULL, "IndexEditRate %d/%d\n", avio_rb32(pb), avio_rb32(pb)); break;
-    case 0x3F0C: av_dlog(NULL, "IndexStartPosition %lld\n", avio_rb64(pb)); break;
-    case 0x3F0D: av_dlog(NULL, "IndexDuration %lld\n", avio_rb64(pb)); break;
+    case 0x3F0C: av_dlog(NULL, "IndexStartPosition %"PRIu64"\n", avio_rb64(pb)); break;
+    case 0x3F0D: av_dlog(NULL, "IndexDuration %"PRIu64"\n", avio_rb64(pb)); break;
     }
     return 0;
 }
@@ -920,7 +920,7 @@ static int mxf_read_header(AVFormatContext *s, AVFormatParameters *ap)
         if (klv_read_packet(&klv, s->pb) < 0)
             return -1;
         PRINT_KEY(s, "read header", klv.key);
-        av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset);
+        av_dlog(s, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset);
         if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key) ||
             IS_KLV_KEY(klv.key, mxf_essence_element_key)) {
             /* FIXME avoid seek */
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index e815fda930..1b5dc1972a 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -43,7 +43,7 @@ static int read_atom(AVFormatContext *s, Atom *atom)
     if (atom->size < 8)
         return -1;
     atom->tag = avio_rl32(s->pb);
-    av_dlog(s, "atom %d %.4s offset %#llx\n",
+    av_dlog(s, "atom %u %.4s offset %#"PRIx64"\n",
             atom->size, (char*)&atom->tag, atom->offset);
     return atom->size;
 }
@@ -131,7 +131,7 @@ static int r3d_read_rdvo(AVFormatContext *s, Atom *atom)
     if (st->codec->time_base.den)
         st->duration = (uint64_t)r3d->video_offsets_count*
             st->time_base.den*st->codec->time_base.num/st->codec->time_base.den;
-    av_dlog(s, "duration %lld\n", st->duration);
+    av_dlog(s, "duration %"PRId64"\n", st->duration);
 
     return 0;
 }
@@ -176,7 +176,7 @@ static int r3d_read_header(AVFormatContext *s, AVFormatParameters *ap)
     }
 
     s->data_offset = avio_tell(s->pb);
-    av_dlog(s, "data offset %#llx\n", s->data_offset);
+    av_dlog(s, "data offset %#"PRIx64"\n", s->data_offset);
     if (!s->pb->seekable)
         return 0;
     // find REOB/REOF/REOS to load index
@@ -255,7 +255,7 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     if (st->codec->time_base.den)
         pkt->duration = (uint64_t)st->time_base.den*
             st->codec->time_base.num/st->codec->time_base.den;
-    av_dlog(s, "pkt dts %lld duration %d\n", pkt->dts, pkt->duration);
+    av_dlog(s, "pkt dts %"PRId64" duration %d\n", pkt->dts, pkt->duration);
 
     return 0;
 }
@@ -299,7 +299,7 @@ static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     pkt->stream_index = 1;
     pkt->dts = dts;
     pkt->duration = av_rescale(samples, st->time_base.den, st->codec->sample_rate);
-    av_dlog(s, "pkt dts %lld duration %d samples %d sample rate %d\n",
+    av_dlog(s, "pkt dts %"PRId64" duration %d samples %d sample rate %d\n",
             pkt->dts, pkt->duration, samples, st->codec->sample_rate);
 
     return 0;
@@ -356,7 +356,8 @@ static int r3d_seek(AVFormatContext *s, int stream_index, int64_t sample_time, i
 
     frame_num = sample_time*st->codec->time_base.den/
         ((int64_t)st->codec->time_base.num*st->time_base.den);
-    av_dlog(s, "seek frame num %d timestamp %lld\n", frame_num, sample_time);
+    av_dlog(s, "seek frame num %d timestamp %"PRId64"\n",
+            frame_num, sample_time);
 
     if (frame_num < r3d->video_offsets_count) {
         avio_seek(s->pb, r3d->video_offsets_count, SEEK_SET);

From 7e985c9e35f2be426039da9a1696a584ebd57ad0 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 31 May 2011 21:24:13 +0200
Subject: [PATCH 481/830] mpegaudioenc: Fix broken av_dlog statement.

---
 libavcodec/mpegaudioenc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index 50876ec2a4..ef265c905d 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -544,11 +544,11 @@ static void compute_bit_allocation(MpegAudioContext *s,
                 }
             }
         }
-        av_dlog(NULL, "current=%d max=%d max_sb=%d alloc=%d\n",
-                current_frame_size, max_frame_size, max_sb,
-                bit_alloc[max_sb]);
         if (max_sb < 0)
             break;
+        av_dlog(NULL, "current=%d max=%d max_sb=%d max_ch=%d alloc=%d\n",
+                current_frame_size, max_frame_size, max_sb, max_ch,
+                bit_alloc[max_ch][max_sb]);
 
         /* find alloc table entry (XXX: not optimal, should use
            pointer table) */

From 836f47d34b49e8ba9883e738a42f154130421caa Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Tue, 24 May 2011 15:14:38 -0400
Subject: [PATCH 482/830] Add IDCT functions for 10-bit H.264.

Ports the majority of IDCT functions for 10-bit H.264.

Parts are inspired from 8-bit IDCT code in Libav; other parts ported from x264 with relicensing permission from author.

Signed-off-by: Ronald S. Bultje <rbultje@google.com>
---
 libavcodec/x86/Makefile            |   3 +-
 libavcodec/x86/h264_idct_10bit.asm | 570 +++++++++++++++++++++++++++++
 libavcodec/x86/h264dsp_mmx.c       |  59 +++
 3 files changed, 631 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/x86/h264_idct_10bit.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index ba664abb1e..38b736e5e7 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -12,8 +12,9 @@ YASM-OBJS-$(CONFIG_FFT)                += x86/fft_mmx.o                 \
 MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_mmx.o
 YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
                                           x86/h264_deblock_10bit.o      \
-                                          x86/h264_weight.o             \
                                           x86/h264_idct.o               \
+                                          x86/h264_idct_10bit.o         \
+                                          x86/h264_weight.o             \
 
 YASM-OBJS-$(CONFIG_H264PRED)           += x86/h264_intrapred.o
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
new file mode 100644
index 0000000000..3d0004e09e
--- /dev/null
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -0,0 +1,570 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 iDCT code
+;*****************************************************************************
+;* Copyright (C) 2005-2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+pw_pixel_max: times 8 dw ((1 << 10)-1)
+pd_32:        times 4 dd 32
+scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
+           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
+           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
+           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
+           db 1+1*8, 2+1*8
+           db 1+2*8, 2+2*8
+           db 1+4*8, 2+4*8
+           db 1+5*8, 2+5*8
+
+%ifdef PIC
+%define scan8 r11
+%else
+%define scan8 scan8_mem
+%endif
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void h264_idct_add(pixel *dst, dctcoef *block, int stride)
+;-----------------------------------------------------------------------------
+%macro STORE_DIFFx2 6
+    psrad       %1, 6
+    psrad       %2, 6
+    packssdw    %1, %2
+    movq        %3, [%5]
+    movhps      %3, [%5+%6]
+    paddsw      %1, %3
+    CLIPW       %1, %4, [pw_pixel_max]
+    movq      [%5], %1
+    movhps [%5+%6], %1
+%endmacro
+
+%macro STORE_DIFF16 5
+    psrad       %1, 6
+    psrad       %2, 6
+    packssdw    %1, %2
+    paddsw      %1, [%5]
+    CLIPW       %1, %3, %4
+    mova      [%5], %1
+%endmacro
+
+;dst, in, stride
+%macro IDCT4_ADD_10 3
+    mova  m0, [%2+ 0]
+    mova  m1, [%2+16]
+    mova  m2, [%2+32]
+    mova  m3, [%2+48]
+    IDCT4_1D d,0,1,2,3,4,5
+    TRANSPOSE4x4D 0,1,2,3,4
+    paddd m0, [pd_32]
+    IDCT4_1D d,0,1,2,3,4,5
+    pxor  m5, m5
+    STORE_DIFFx2 m0, m1, m4, m5, %1, %3
+    lea   %1, [%1+%3*2]
+    STORE_DIFFx2 m2, m3, m4, m5, %1, %3
+%endmacro
+
+%macro IDCT_ADD_10 1
+cglobal h264_idct_add_10_%1, 3,3
+    IDCT4_ADD_10 r0, r1, r2
+    RET
+%endmacro
+
+INIT_XMM
+IDCT_ADD_10 sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT_ADD_10 avx
+%endif
+
+;-----------------------------------------------------------------------------
+; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+;;;;;;; NO FATE SAMPLES TRIGGER THIS
+%macro ADD4x4IDCT 1
+add4x4_idct_%1:
+    add   r5, r0
+    mova  m0, [r2+ 0]
+    mova  m1, [r2+16]
+    mova  m2, [r2+32]
+    mova  m3, [r2+48]
+    IDCT4_1D d,0,1,2,3,4,5
+    TRANSPOSE4x4D 0,1,2,3,4
+    paddd m0, [pd_32]
+    IDCT4_1D d,0,1,2,3,4,5
+    pxor  m5, m5
+    STORE_DIFFx2 m0, m1, m4, m5, r5, r3
+    lea   r5, [r5+r3*2]
+    STORE_DIFFx2 m2, m3, m4, m5, r5, r3
+    ret
+%endmacro
+
+INIT_XMM
+ALIGN 16
+ADD4x4IDCT sse2
+%ifdef HAVE_AVX
+INIT_AVX
+ALIGN 16
+ADD4x4IDCT avx
+%endif
+
+%macro ADD16_OP 3
+    cmp          byte [r4+%3], 0
+    jz .skipblock%2
+    mov         r5d, dword [r1+%2*4]
+    call add4x4_idct_%1
+.skipblock%2:
+%if %2<15
+    add          r2, 64
+%endif
+%endmacro
+
+%macro IDCT_ADD16_10 1
+cglobal h264_idct_add16_10_%1, 5,6
+    ADD16_OP %1, 0, 4+1*8
+    ADD16_OP %1, 1, 5+1*8
+    ADD16_OP %1, 2, 4+2*8
+    ADD16_OP %1, 3, 5+2*8
+    ADD16_OP %1, 4, 6+1*8
+    ADD16_OP %1, 5, 7+1*8
+    ADD16_OP %1, 6, 6+2*8
+    ADD16_OP %1, 7, 7+2*8
+    ADD16_OP %1, 8, 4+3*8
+    ADD16_OP %1, 9, 5+3*8
+    ADD16_OP %1, 10, 4+4*8
+    ADD16_OP %1, 11, 5+4*8
+    ADD16_OP %1, 12, 6+3*8
+    ADD16_OP %1, 13, 7+3*8
+    ADD16_OP %1, 14, 6+4*8
+    ADD16_OP %1, 15, 7+4*8
+    RET
+%endmacro
+
+INIT_XMM
+IDCT_ADD16_10 sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT_ADD16_10 avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride)
+;-----------------------------------------------------------------------------
+%macro IDCT_DC_ADD_OP_10 3
+    pxor      m5, m5
+%if avx_enabled
+    paddw     m1, m0, [%1+0   ]
+    paddw     m2, m0, [%1+%2  ]
+    paddw     m3, m0, [%1+%2*2]
+    paddw     m4, m0, [%1+%3  ]
+%else
+    mova      m1, [%1+0   ]
+    mova      m2, [%1+%2  ]
+    mova      m3, [%1+%2*2]
+    mova      m4, [%1+%3  ]
+    paddw     m1, m0
+    paddw     m2, m0
+    paddw     m3, m0
+    paddw     m4, m0
+%endif
+    CLIPW     m1, m5, m6
+    CLIPW     m2, m5, m6
+    CLIPW     m3, m5, m6
+    CLIPW     m4, m5, m6
+    mova [%1+0   ], m1
+    mova [%1+%2  ], m2
+    mova [%1+%2*2], m3
+    mova [%1+%3  ], m4
+%endmacro
+
+INIT_MMX
+cglobal h264_idct_dc_add_10_mmx2,3,3
+    movd      m0, dword [r1]
+    paddd     m0, [pd_32]
+    psrad     m0, 6
+    lea       r1, [r2*3]
+    pshufw    m0, m0, 0
+    mova      m6, [pw_pixel_max]
+    IDCT_DC_ADD_OP_10 r0, r2, r1
+    RET
+
+;-----------------------------------------------------------------------------
+; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
+;-----------------------------------------------------------------------------
+%macro IDCT8_DC_ADD 1
+cglobal h264_idct8_dc_add_10_%1,3,3,7
+    mov      r1d, dword [r1]
+    add       r1, 32
+    sar       r1, 6
+    movd      m0, r1d
+    lea       r1, [r2*3]
+    SPLATW    m0, m0, 0
+    mova      m6, [pw_pixel_max]
+    IDCT_DC_ADD_OP_10 r0, r2, r1
+    lea       r0, [r0+r2*4]
+    IDCT_DC_ADD_OP_10 r0, r2, r1
+    RET
+%endmacro
+
+INIT_XMM
+IDCT8_DC_ADD sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT8_DC_ADD avx
+%endif
+
+;-----------------------------------------------------------------------------
+; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+%macro AC 2
+.ac%2
+    mov  r5d, dword [r1+(%2+0)*4]
+    call add4x4_idct_%1
+    mov  r5d, dword [r1+(%2+1)*4]
+    add  r2, 64
+    call add4x4_idct_%1
+    add  r2, 64
+    jmp .skipadd%2
+%endmacro
+
+%macro ADD16_OP_INTRA 3
+    cmp         word [r4+%3], 0
+    jnz .ac%2
+    mov         r6d, dword [r2+ 0]
+    or          r6d, dword [r2+64]
+    jz .skipblock%2
+    mov  r5d, dword [r1+(%2+0)*4]
+    call idct_dc_add_%1
+.skipblock%2:
+%if %2<15
+    add          r2, 128
+%endif
+.skipadd%2:
+%endmacro
+
+%macro IDCT_ADD16INTRA_10 1
+idct_dc_add_%1:
+    add       r5, r0
+    movq      m0, [r2+ 0]
+    movhps    m0, [r2+64]
+    paddd     m0, [pd_32]
+    psrad     m0, 6
+    pshufhw   m0, m0, 0
+    pshuflw   m0, m0, 0
+    lea       r6, [r3*3]
+    mova      m6, [pw_pixel_max]
+    IDCT_DC_ADD_OP_10 r5, r3, r6
+    ret
+
+cglobal h264_idct_add16intra_10_%1,5,7,8
+    ADD16_OP_INTRA %1, 0, 4+1*8
+    ADD16_OP_INTRA %1, 2, 4+2*8
+    ADD16_OP_INTRA %1, 4, 6+1*8
+    ADD16_OP_INTRA %1, 6, 6+2*8
+    ADD16_OP_INTRA %1, 8, 4+3*8
+    ADD16_OP_INTRA %1, 10, 4+4*8
+    ADD16_OP_INTRA %1, 12, 6+3*8
+    ADD16_OP_INTRA %1, 14, 6+4*8
+    RET
+%assign i 14
+%rep 8
+    AC %1, i
+%assign i i-2
+%endrep
+%endmacro
+
+INIT_XMM
+IDCT_ADD16INTRA_10 sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT_ADD16INTRA_10 avx
+%endif
+
+;-----------------------------------------------------------------------------
+; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+%macro IDCT_ADD8 1
+cglobal h264_idct_add8_10_%1,5,7
+    mov          r5, 16
+    add          r2, 1024
+%ifdef PIC
+    lea         r11, [scan8_mem]
+%endif
+%ifdef ARCH_X86_64
+    mov         r10, r0
+%endif
+.nextblock:
+    movzx        r6, byte [scan8+r5]
+    movzx        r6, byte [r4+r6]
+    or          r6d, dword [r2]
+    test         r6, r6
+    jz .skipblock
+%ifdef ARCH_X86_64
+    mov         r0d, dword [r1+r5*4]
+    add          r0, [r10]
+%else
+    mov          r0, r0m
+    mov          r0, [r0]
+    add          r0, dword [r1+r5*4]
+%endif
+    IDCT4_ADD_10 r0, r2, r3
+.skipblock:
+    inc          r5
+    add          r2, 64
+    test         r5, 3
+    jnz .nextblock
+%ifdef ARCH_X86_64
+    add         r10, gprsize
+%else
+    add        r0mp, gprsize
+%endif
+    test         r5, 4
+    jnz .nextblock
+    REP_RET
+%endmacro ; IDCT_ADD8
+
+INIT_XMM
+IDCT_ADD8 sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT_ADD8 avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void h264_idct8_add(pixel *dst, dctcoef *block, int stride)
+;-----------------------------------------------------------------------------
+%macro IDCT8_1D 2
+    SWAP         0, 1
+    psrad        m4, m5, 1
+    psrad        m1, m0, 1
+    paddd        m4, m5
+    paddd        m1, m0
+    paddd        m4, m7
+    paddd        m1, m5
+    psubd        m4, m0
+    paddd        m1, m3
+
+    psubd        m0, m3
+    psubd        m5, m3
+    paddd        m0, m7
+    psubd        m5, m7
+    psrad        m3, 1
+    psrad        m7, 1
+    psubd        m0, m3
+    psubd        m5, m7
+
+    SWAP         1, 7
+    psrad        m1, m7, 2
+    psrad        m3, m4, 2
+    paddd        m3, m0
+    psrad        m0, 2
+    paddd        m1, m5
+    psrad        m5, 2
+    psubd        m0, m4
+    psubd        m7, m5
+
+    SWAP         5, 6
+    psrad        m4, m2, 1
+    psrad        m6, m5, 1
+    psubd        m4, m5
+    paddd        m6, m2
+
+    mova         m2, %1
+    mova         m5, %2
+    SUMSUB_BA    d, 5, 2
+    SUMSUB_BA    d, 6, 5
+    SUMSUB_BA    d, 4, 2
+    SUMSUB_BA    d, 7, 6
+    SUMSUB_BA    d, 0, 4
+    SUMSUB_BA    d, 3, 2
+    SUMSUB_BA    d, 1, 5
+    SWAP         7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
+%endmacro
+
+%macro IDCT8_1D_FULL 1
+    mova         m7, [%1+112*2]
+    mova         m6, [%1+ 96*2]
+    mova         m5, [%1+ 80*2]
+    mova         m3, [%1+ 48*2]
+    mova         m2, [%1+ 32*2]
+    mova         m1, [%1+ 16*2]
+    IDCT8_1D   [%1], [%1+ 64*2]
+%endmacro
+
+; %1=int16_t *block, %2=int16_t *dstblock
+%macro IDCT8_ADD_SSE_START 2
+    IDCT8_1D_FULL %1
+%ifdef ARCH_X86_64
+    TRANSPOSE4x4D  0,1,2,3,8
+    mova    [%2    ], m0
+    TRANSPOSE4x4D  4,5,6,7,8
+    mova    [%2+8*2], m4
+%else
+    mova         [%1], m7
+    TRANSPOSE4x4D   0,1,2,3,7
+    mova           m7, [%1]
+    mova    [%2     ], m0
+    mova    [%2+16*2], m1
+    mova    [%2+32*2], m2
+    mova    [%2+48*2], m3
+    TRANSPOSE4x4D   4,5,6,7,3
+    mova    [%2+ 8*2], m4
+    mova    [%2+24*2], m5
+    mova    [%2+40*2], m6
+    mova    [%2+56*2], m7
+%endif
+%endmacro
+
+; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
+%macro IDCT8_ADD_SSE_END 3
+    IDCT8_1D_FULL %2
+    mova  [%2     ], m6
+    mova  [%2+16*2], m7
+
+    pxor         m7, m7
+    STORE_DIFFx2 m0, m1, m6, m7, %1, %3
+    lea          %1, [%1+%3*2]
+    STORE_DIFFx2 m2, m3, m6, m7, %1, %3
+    mova         m0, [%2     ]
+    mova         m1, [%2+16*2]
+    lea          %1, [%1+%3*2]
+    STORE_DIFFx2 m4, m5, m6, m7, %1, %3
+    lea          %1, [%1+%3*2]
+    STORE_DIFFx2 m0, m1, m6, m7, %1, %3
+%endmacro
+
+%macro IDCT8_ADD 1
+cglobal h264_idct8_add_10_%1, 3,4,16
+%ifndef UNIX64
+    %assign pad 16-gprsize-(stack_offset&15)
+    sub  rsp, pad
+    call h264_idct8_add1_10_%1
+    add  rsp, pad
+    RET
+%endif
+
+ALIGN 16
+; TODO: does not need to use stack
+h264_idct8_add1_10_%1:
+%assign pad 256+16-gprsize
+    sub          rsp, pad
+    add   dword [r1], 32
+
+%ifdef ARCH_X86_64
+    IDCT8_ADD_SSE_START r1, rsp
+    SWAP 1,  9
+    SWAP 2, 10
+    SWAP 3, 11
+    SWAP 5, 13
+    SWAP 6, 14
+    SWAP 7, 15
+    IDCT8_ADD_SSE_START r1+16, rsp+128
+    PERMUTE 1,9, 2,10, 3,11, 5,1, 6,2, 7,3, 9,13, 10,14, 11,15, 13,5, 14,6, 15,7
+    IDCT8_1D [rsp], [rsp+128]
+    SWAP 0,  8
+    SWAP 1,  9
+    SWAP 2, 10
+    SWAP 3, 11
+    SWAP 4, 12
+    SWAP 5, 13
+    SWAP 6, 14
+    SWAP 7, 15
+    IDCT8_1D [rsp+16], [rsp+144]
+    psrad         m8, 6
+    psrad         m0, 6
+    packssdw      m8, m0
+    paddsw        m8, [r0]
+    pxor          m0, m0
+    CLIPW         m8, m0, [pw_pixel_max]
+    mova        [r0], m8
+    mova          m8, [pw_pixel_max]
+    STORE_DIFF16  m9, m1, m0, m8, r0+r2
+    lea           r0, [r0+r2*2]
+    STORE_DIFF16 m10, m2, m0, m8, r0
+    STORE_DIFF16 m11, m3, m0, m8, r0+r2
+    lea           r0, [r0+r2*2]
+    STORE_DIFF16 m12, m4, m0, m8, r0
+    STORE_DIFF16 m13, m5, m0, m8, r0+r2
+    lea           r0, [r0+r2*2]
+    STORE_DIFF16 m14, m6, m0, m8, r0
+    STORE_DIFF16 m15, m7, m0, m8, r0+r2
+%else
+    IDCT8_ADD_SSE_START r1,    rsp
+    IDCT8_ADD_SSE_START r1+16, rsp+128
+    lea           r3, [r0+8]
+    IDCT8_ADD_SSE_END r0, rsp,    r2
+    IDCT8_ADD_SSE_END r3, rsp+16, r2
+%endif ; ARCH_X86_64
+
+    add          rsp, pad
+    ret
+%endmacro
+
+INIT_XMM
+IDCT8_ADD sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT8_ADD avx
+%endif
+
+;-----------------------------------------------------------------------------
+; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+;;;;;;; NO FATE SAMPLES TRIGGER THIS
+%macro IDCT8_ADD4_OP 3
+    cmp       byte [r4+%3], 0
+    jz .skipblock%2
+    mov      r0d, dword [r6+%2*4]
+    add       r0, r5
+    call h264_idct8_add1_10_%1
+.skipblock%2:
+%if %2<12
+    add       r1, 256
+%endif
+%endmacro
+
+%macro IDCT8_ADD4 1
+cglobal h264_idct8_add4_10_%1, 0,7,16
+    %assign pad 16-gprsize-(stack_offset&15)
+    SUB      rsp, pad
+    mov       r5, r0mp
+    mov       r6, r1mp
+    mov       r1, r2mp
+    mov      r2d, r3m
+    movifnidn r4, r4mp
+    IDCT8_ADD4_OP %1,  0, 4+1*8
+    IDCT8_ADD4_OP %1,  4, 6+1*8
+    IDCT8_ADD4_OP %1,  8, 4+3*8
+    IDCT8_ADD4_OP %1, 12, 6+3*8
+    ADD       rsp, pad
+    RET
+%endmacro ; IDCT8_ADD4
+
+INIT_XMM
+IDCT8_ADD4 sse2
+%ifdef HAVE_AVX
+INIT_AVX
+IDCT8_ADD4 avx
+%endif
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 1c07d14cd0..d60fbd5e79 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -27,6 +27,43 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
 
 /***********************************/
 /* IDCT */
+#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
+void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
+
+IDCT_ADD_FUNC(, 10, sse2)
+IDCT_ADD_FUNC(_dc, 10, mmx2)
+IDCT_ADD_FUNC(8_dc, 10, sse2)
+IDCT_ADD_FUNC(8, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_FUNC(, 10, avx)
+IDCT_ADD_FUNC(8_dc, 10, avx)
+IDCT_ADD_FUNC(8, 10, avx)
+#endif
+
+
+#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
+                              (uint8_t *dst, const int *block_offset, \
+                              DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+
+IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
+IDCT_ADD_REP_FUNC(8, 4, 10, avx)
+IDCT_ADD_REP_FUNC(, 16, 10, sse2)
+IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_REP_FUNC(, 16, 10, avx)
+IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
+#endif
+
+
+#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
+                              (uint8_t **dst, const int *block_offset, \
+                              DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_REP_FUNC2(, 8, 10, avx)
+#endif
 
 void ff_h264_idct_add_mmx     (uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct8_add_mmx    (uint8_t *dst, int16_t *block, int stride);
@@ -418,7 +455,17 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
             c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
 #endif
+            c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2;
             if (mm_flags&AV_CPU_FLAG_SSE2) {
+                c->h264_idct_add       = ff_h264_idct_add_10_sse2;
+                c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
+                c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
+
+                c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
+                c->h264_idct8_add4     = ff_h264_idct8_add4_10_sse2;
+                c->h264_idct_add8      = ff_h264_idct_add8_10_sse2;
+                c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
+
                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
 #if HAVE_ALIGNED_STACK
@@ -428,7 +475,18 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
 #endif
             }
+#if HAVE_AVX
             if (mm_flags&AV_CPU_FLAG_AVX) {
+                c->h264_idct_dc_add    =
+                c->h264_idct_add       = ff_h264_idct_add_10_avx;
+                c->h264_idct8_add      = ff_h264_idct8_add_10_avx;
+                c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
+
+                c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
+                c->h264_idct8_add4     = ff_h264_idct8_add4_10_avx;
+                c->h264_idct_add8      = ff_h264_idct_add8_10_avx;
+                c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
+
                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx;
                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx;
 #if HAVE_ALIGNED_STACK
@@ -438,6 +496,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
 #endif
             }
+#endif /* HAVE_AVX */
         }
     }
 #endif

From 348493db60de19d1997fd2861e130720218b9fcf Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Tue, 24 May 2011 15:15:08 -0400
Subject: [PATCH 483/830] Update 8-bit H.264 IDCT function names to reflect
 bit-depth.

Signed-off-by: Ronald S. Bultje <rbultje@google.com>
---
 libavcodec/h264dsp.h         |  1 -
 libavcodec/x86/h264_idct.asm | 38 ++++++++--------
 libavcodec/x86/h264dsp_mmx.c | 88 +++++++++++++++---------------------
 3 files changed, 56 insertions(+), 71 deletions(-)

diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 87a1dd9722..864c118bb5 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -66,7 +66,6 @@ typedef struct H264DSPContext{
     void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
 
-    void (*h264_dct)(DCTELEM block[4][4]);
     void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index ae70a3049b..f90f41c4bc 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -73,7 +73,7 @@ SECTION .text
 
 INIT_MMX
 ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct_add_mmx, 3, 3, 0
+cglobal h264_idct_add_8_mmx, 3, 3, 0
     IDCT4_ADD    r0, r1, r2
     RET
 
@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
     SUMSUB_BA    w, 0, 4
     SUMSUB_BA    w, 3, 2
     SUMSUB_BA    w, 1, 5
-    SWAP          7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
+    SWAP         7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
 %endmacro
 
 %macro IDCT8_1D_FULL 1
@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
 
 INIT_MMX
 ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_add_mmx, 3, 4, 0
+cglobal h264_idct8_add_8_mmx, 3, 4, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0
 
 INIT_XMM
 ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_add_sse2, 3, 4, 10
+cglobal h264_idct8_add_8_sse2, 3, 4, 10
     IDCT8_ADD_SSE r0, r1, r2, r3
     RET
 
@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
     packuswb     m1, m1
 %endmacro
 
-%macro DC_ADD_MMX2_OP 3-4
+%macro DC_ADD_MMX2_OP 4
     %1           m2, [%2     ]
     %1           m3, [%2+%3  ]
     %1           m4, [%2+%3*2]
@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
 
 INIT_MMX
 ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct_dc_add_mmx2, 3, 3, 0
+cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
     DC_ADD_MMX2_INIT r1, r2
     DC_ADD_MMX2_OP movh, r0, r2, r1
     RET
 
 ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
+cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
     DC_ADD_MMX2_INIT r1, r2
     DC_ADD_MMX2_OP mova, r0, r2, r1
     lea          r0, [r0+r2*4]
@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
 
 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
 ;             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_mmx, 5, 7, 0
+cglobal h264_idct_add16_8_mmx, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0
 
 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_mmx, 5, 7, 0
+cglobal h264_idct8_add4_8_mmx, 5, 7, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0
 
 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_mmx2, 5, 7, 0
+cglobal h264_idct_add16_8_mmx2, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0
 
 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
 ;                             DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_mmx, 5, 7, 0
+cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0
 
 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
 ;                              DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_mmx2, 5, 7, 0
+cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0
 
 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_mmx2, 5, 7, 0
+cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad
 
@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0
 INIT_XMM
 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct8_add4_sse2, 5, 7, 10
+cglobal h264_idct8_add4_8_sse2, 5, 7, 10
     xor          r5, r5
 %ifdef PIC
     lea         r11, [scan8_mem]
@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane:
 
 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
 ;                       DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_mmx, 5, 7, 0
+cglobal h264_idct_add8_8_mmx, 5, 7, 0
     mov          r5, 16
     add          r2, 512
 %ifdef PIC
@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane
 
 ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_mmx2, 5, 7, 0
+cglobal h264_idct_add8_8_mmx2, 5, 7, 0
     mov          r5, 16
     add          r2, 512
 %ifdef ARCH_X86_64
@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2:
 
 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
 ;                         DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16_sse2, 5, 5, 8
+cglobal h264_idct_add16_8_sse2, 5, 5, 8
 %ifdef ARCH_X86_64
     mov        r10, r0
 %endif
@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
 
 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
 ;                              DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_sse2, 5, 7, 8
+cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
 %ifdef ARCH_X86_64
     mov        r10, r0
 %endif
@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8
 
 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
 ;                        DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add8_sse2, 5, 7, 8
+cglobal h264_idct_add8_8_sse2, 5, 7, 8
     add          r2, 512
 %ifdef ARCH_X86_64
     mov         r10, r0
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index d60fbd5e79..1a31e41a43 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
 #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
 void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
 
+IDCT_ADD_FUNC(, 8, mmx)
 IDCT_ADD_FUNC(, 10, sse2)
+IDCT_ADD_FUNC(_dc, 8, mmx2)
 IDCT_ADD_FUNC(_dc, 10, mmx2)
+IDCT_ADD_FUNC(8_dc, 8, mmx2)
 IDCT_ADD_FUNC(8_dc, 10, sse2)
+IDCT_ADD_FUNC(8, 8, mmx)
+IDCT_ADD_FUNC(8, 8, sse2)
 IDCT_ADD_FUNC(8, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_FUNC(, 10, avx)
@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
                               (uint8_t *dst, const int *block_offset, \
                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
+IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
 IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
 IDCT_ADD_REP_FUNC(8, 4, 10, avx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16, 8, sse2)
 IDCT_ADD_REP_FUNC(, 16, 10, sse2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_REP_FUNC(, 16, 10, avx)
@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
                               (uint8_t **dst, const int *block_offset, \
                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
+IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
 #if HAVE_AVX
 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
 #endif
 
-void ff_h264_idct_add_mmx     (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_add_mmx    (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_add_sse2   (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride);
-void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride);
-
-void ff_h264_idct_add16_mmx      (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_mmx      (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16_mmx2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_mmx2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_sse2     (uint8_t *dst, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_mmx       (uint8_t **dest, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_mmx2      (uint8_t **dest, const int *block_offset,
-                                  DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-
-void ff_h264_idct_add16_sse2     (uint8_t *dst, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_sse2      (uint8_t **dest, const int *block_offset, DCTELEM *block,
-                                  int stride, const uint8_t nnzc[6*8]);
 void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul);
 void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
 
@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
     }
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
-        c->h264_idct_dc_add=
-        c->h264_idct_add= ff_h264_idct_add_mmx;
-        c->h264_idct8_dc_add=
-        c->h264_idct8_add= ff_h264_idct8_add_mmx;
+        c->h264_idct_dc_add         =
+        c->h264_idct_add            = ff_h264_idct_add_8_mmx;
+        c->h264_idct8_dc_add        =
+        c->h264_idct8_add           = ff_h264_idct8_add_8_mmx;
 
-        c->h264_idct_add16     = ff_h264_idct_add16_mmx;
-        c->h264_idct8_add4     = ff_h264_idct8_add4_mmx;
-        c->h264_idct_add8      = ff_h264_idct_add8_mmx;
-        c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+        c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
+        c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
+        c->h264_idct_add8           = ff_h264_idct_add8_8_mmx;
+        c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
         c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
 
         if (mm_flags & AV_CPU_FLAG_MMX2) {
-            c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
-            c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
-            c->h264_idct_add16     = ff_h264_idct_add16_mmx2;
-            c->h264_idct8_add4     = ff_h264_idct8_add4_mmx2;
-            c->h264_idct_add8      = ff_h264_idct_add8_mmx2;
-            c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+            c->h264_idct_dc_add    = ff_h264_idct_dc_add_8_mmx2;
+            c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
+            c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
+            c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
+            c->h264_idct_add8      = ff_h264_idct_add8_8_mmx2;
+            c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
 
             c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
             c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
 
             if (mm_flags&AV_CPU_FLAG_SSE2) {
-                c->h264_idct8_add = ff_h264_idct8_add_sse2;
-                c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+                c->h264_idct8_add           = ff_h264_idct8_add_8_sse2;
+
+                c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
+                c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
+                c->h264_idct_add8           = ff_h264_idct_add8_8_sse2;
+                c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
                 c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
 
                 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
 #endif
-
-                c->h264_idct_add16 = ff_h264_idct_add16_sse2;
-                c->h264_idct_add8  = ff_h264_idct_add8_sse2;
-                c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
             }
             if (mm_flags&AV_CPU_FLAG_SSSE3) {
                 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;

From 201549d1a95e5ea81a97257368668cda0afcb2f8 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 1 Jun 2011 04:15:13 +0200
Subject: [PATCH 484/830] swscale: More accurate rounding in
 YSCALE_YUV_2_PACKEDX_FULL_C()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index e7e81d35cb..6bf7fcfd9d 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -557,9 +557,9 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
 #define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
     for (i=0; i<dstW; i++) {\
         int j;\
-        int Y = 0;\
-        int U = -128<<19;\
-        int V = -128<<19;\
+        int Y = 1<<9;\
+        int U = (1<<9)-(128<<19);\
+        int V = (1<<9)-(128<<19);\
         int av_unused A;\
         int R,G,B;\
         \

From f3aa65af3a84c787b0fa8b8cf0881fc05668a24c Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Tue, 31 May 2011 23:10:51 -0400
Subject: [PATCH 485/830] h264/10bit: add HAVE_ALIGNED_STACK checks.

Fixes regression in 836f47d34b49e8ba9883e738a42f154130421caa in ICC-10.x,
since ICC<=11.0 doesn't align stack upon function calls.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/h264dsp_mmx.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 1a31e41a43..3fccd081d5 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -445,12 +445,14 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             if (mm_flags&AV_CPU_FLAG_SSE2) {
                 c->h264_idct_add       = ff_h264_idct_add_10_sse2;
                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
-                c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
 
                 c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
-                c->h264_idct8_add4     = ff_h264_idct8_add4_10_sse2;
                 c->h264_idct_add8      = ff_h264_idct_add8_10_sse2;
                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
+#if HAVE_ALIGNED_STACK
+                c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
+                c->h264_idct8_add4     = ff_h264_idct8_add4_10_sse2;
+#endif
 
                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
@@ -465,13 +467,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             if (mm_flags&AV_CPU_FLAG_AVX) {
                 c->h264_idct_dc_add    =
                 c->h264_idct_add       = ff_h264_idct_add_10_avx;
-                c->h264_idct8_add      = ff_h264_idct8_add_10_avx;
                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
 
                 c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
-                c->h264_idct8_add4     = ff_h264_idct8_add4_10_avx;
                 c->h264_idct_add8      = ff_h264_idct_add8_10_avx;
                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
+#if HAVE_ALIGNED_STACK
+                c->h264_idct8_add      = ff_h264_idct8_add_10_avx;
+                c->h264_idct8_add4     = ff_h264_idct8_add4_10_avx;
+#endif
 
                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx;
                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx;

From ca858ab77d7bb49ae99485721bbbe3a580670904 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 09:18:42 +0200
Subject: [PATCH 486/830] doc/examples: give meaningful names to the example
 files

Rename:
api-example.c    -> encoding-example.c
output-example.c -> muxing-example.c
---
 doc/examples/Makefile                               | 2 +-
 doc/examples/{api-example.c => encoding-example.c}  | 0
 doc/examples/{output-example.c => muxing-example.c} | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename doc/examples/{api-example.c => encoding-example.c} (100%)
 rename doc/examples/{output-example.c => muxing-example.c} (100%)

diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index facbd7e178..c32d524da4 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -3,7 +3,7 @@ FFMPEG_LIBS=libavdevice libavformat libavfilter libavcodec libswscale libavutil
 CFLAGS+=$(shell pkg-config  --cflags $(FFMPEG_LIBS))
 LDFLAGS+=$(shell pkg-config --libs $(FFMPEG_LIBS))
 
-EXAMPLES=api-example output-example
+EXAMPLES=encoding-example muxing-example
 
 OBJS=$(addsuffix .o,$(EXAMPLES))
 
diff --git a/doc/examples/api-example.c b/doc/examples/encoding-example.c
similarity index 100%
rename from doc/examples/api-example.c
rename to doc/examples/encoding-example.c
diff --git a/doc/examples/output-example.c b/doc/examples/muxing-example.c
similarity index 100%
rename from doc/examples/output-example.c
rename to doc/examples/muxing-example.c

From ac41f3b036246e64d1724d945bea01d982bd1c91 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 16:52:35 +0200
Subject: [PATCH 487/830] ffmpeg: handle copy of packets for AVFMT_RAWPICTURE
 output formats

Store AVPicture in AVPacket as required by AVFMT_RAWPICTURE formats.

Fix trac issue #251.
---
 ffmpeg.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index 220feb298d..e962f1562a 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1731,6 +1731,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
                         }
                     } else {
                         AVFrame avframe; //FIXME/XXX remove this
+                        AVPicture pict;
                         AVPacket opkt;
                         int64_t ost_tb_start_time= av_rescale_q(start_time, AV_TIME_BASE_Q, ost->st->time_base);
 
@@ -1784,6 +1785,13 @@ static int output_packet(AVInputStream *ist, int ist_index,
                             opkt.size = data_size;
                         }
 
+                        if (os->oformat->flags & AVFMT_RAWPICTURE) {
+                            /* store AVPicture in AVPacket, as expected by the output format */
+                            avpicture_fill(&pict, opkt.data, ost->st->codec->pix_fmt, ost->st->codec->width, ost->st->codec->height);
+                            opkt.data = (uint8_t *)&pict;
+                            opkt.size = sizeof(AVPicture);
+                            opkt.flags |= AV_PKT_FLAG_KEY;
+                        }
                         write_frame(os, &opkt, ost->st->codec, ost->bitstream_filters);
                         ost->st->codec->frame_number++;
                         ost->frame_number++;

From 1f95fb58137951941d8d74bd47b1635b6d2399ec Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 30 May 2011 17:09:12 +0200
Subject: [PATCH 488/830] yuv4mpeg: complain and exit if a non-rawvideo stream
 is selected

The yuv4mpeg muxer will crash otherwise.
---
 libavformat/yuv4mpeg.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c
index 445ec3701b..90b222d1d4 100644
--- a/libavformat/yuv4mpeg.c
+++ b/libavformat/yuv4mpeg.c
@@ -154,6 +154,12 @@ static int yuv4_write_header(AVFormatContext *s)
     if (s->nb_streams != 1)
         return AVERROR(EIO);
 
+    if (s->streams[0]->codec->codec_id != CODEC_ID_RAWVIDEO) {
+        av_log(s, AV_LOG_ERROR,
+               "A non-rawvideo stream was selected, but yuv4mpeg only handles rawvideo streams\n");
+        return AVERROR(EINVAL);
+    }
+
     if (s->streams[0]->codec->pix_fmt == PIX_FMT_YUV411P) {
         av_log(s, AV_LOG_ERROR, "Warning: generating rarely used 4:1:1 YUV stream, some mjpegtools might not work.\n");
     }

From adf94155989c765c4cac6e3ef5d3526555ad2274 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 1 Jun 2011 13:07:57 +0200
Subject: [PATCH 489/830] lavf: tag dump_format() as @deprecated

This makes the generated Doxygen doc link to the replacement
av_dump_format() function.
---
 libavformat/avformat.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 41830adcac..1703bccec9 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1459,6 +1459,9 @@ int av_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
 int av_write_trailer(AVFormatContext *s);
 
 #if FF_API_DUMP_FORMAT
+/**
+ * @deprecated Deprecated in favor of av_dump_format().
+ */
 attribute_deprecated void dump_format(AVFormatContext *ic,
                                       int index,
                                       const char *url,

From eb5a3ab7b0ddaa9e7ce1b5070ba44bda9ce41ce9 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 1 Jun 2011 13:30:35 +0200
Subject: [PATCH 490/830] swscale: fix compilation of bfin due to missing
 pixdesc.h header

This is required after sws_format_name() was replaced by
av_get_pix_fmt(), which is declared in libavutil/pixdesc.h.
---
 libswscale/bfin/yuv2rgb_bfin.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c
index a2be1df0ed..7a7dc7f0e6 100644
--- a/libswscale/bfin/yuv2rgb_bfin.c
+++ b/libswscale/bfin/yuv2rgb_bfin.c
@@ -28,6 +28,7 @@
 #include <assert.h>
 #include "config.h"
 #include <unistd.h>
+#include "libavutil/pixdesc.h"
 #include "libswscale/rgb2rgb.h"
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"

From bde2c1c7fe175c5c857a8d9a8ae05f31ace0b94f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 1 Jun 2011 12:17:30 +0200
Subject: [PATCH 491/830] mpegaudiodec: Fix av_dlog() invocation.

Some parameters passed to the av_dlog can be either float or int, depending on
the mode the file is being compiled as.  Cast those parameters to float and use
appropriate conversion specifiers.
---
 libavcodec/mpegaudiodec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 6910d1fa4c..033d76e049 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -406,8 +406,9 @@ static av_cold int decode_init(AVCodecContext * avctx)
                 k = i & 1;
                 is_table_lsf[j][k ^ 1][i] = FIXR(f);
                 is_table_lsf[j][k][i] = FIXR(1.0);
-                av_dlog(avctx, "is_table_lsf %d %d: %x %x\n",
-                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
+                av_dlog(avctx, "is_table_lsf %d %d: %f %f\n",
+                        i, j, (float) is_table_lsf[j][0][i],
+                        (float) is_table_lsf[j][1][i]);
             }
         }
 

From 8e112df409061034202b98fcc6ce2c1c670c0dda Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 29 May 2011 14:27:03 +0100
Subject: [PATCH 492/830] ARM: ac3dsp: optimised update_bap_counts()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/Makefile          |  1 +
 libavcodec/arm/ac3dsp_arm.S      | 35 ++++++++++++++++++++++++++++++++
 libavcodec/arm/ac3dsp_init_arm.c |  4 +++-
 3 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/arm/ac3dsp_arm.S

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a5a5dfab64..a5abfdd128 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -1,4 +1,5 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
+                                          arm/ac3dsp_arm.o
 
 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
new file mode 100644
index 0000000000..545714cff1
--- /dev/null
+++ b/libavcodec/arm/ac3dsp_arm.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+function ff_ac3_update_bap_counts_arm, export=1
+        push            {lr}
+        ldrb            lr,  [r1], #1
+1:
+        lsl             r3,  lr,  #1
+        ldrh            r12, [r0, r3]
+        subs            r2,  r2,  #1
+        ldrbgt          lr,  [r1], #1
+        add             r12, r12, #1
+        strh            r12, [r0, r3]
+        bgt             1b
+        pop             {pc}
+endfunc
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index 4414dc8170..aed11f4bb8 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -35,10 +35,12 @@ void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd,
                                      int snr_offset, int floor,
                                      const uint8_t *bap_tab, uint8_t *bap);
 
-int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);
+void ff_ac3_update_bap_counts_arm(uint16_t mant_cnt[16], uint8_t *bap, int len);
 
 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
+    c->update_bap_counts         = ff_ac3_update_bap_counts_arm;
+
     if (HAVE_ARMV6) {
         c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
     }

From bf19c871012644fe27c69531b5f733c472aa858b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 20:26:54 +0200
Subject: [PATCH 493/830] Fix type of out[] variable, it should not be const.

Fixes compiler warning about incompatible types in sws_scale call.
---
 libavfilter/vf_scale.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 9ff93bd411..e172a2e586 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -270,7 +270,8 @@ static int scale_slice(AVFilterLink *link, struct SwsContext *sws, int y, int h,
     ScaleContext *scale = link->dst->priv;
     AVFilterBufferRef *cur_pic = link->cur_buf;
     AVFilterBufferRef *out_buf = link->dst->outputs[0]->out_buf;
-    const uint8_t *in[4], *out[4];
+    const uint8_t *in[4];
+    uint8_t *out[4];
     int in_stride[4],out_stride[4];
     int i;
 

From 06a9da736554197601e4062298fcf45a5e8d49ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 20:29:27 +0200
Subject: [PATCH 494/830] Simplify code and avoid compiler warning about
 incompatible types.

---
 libavformat/avio.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavformat/avio.c b/libavformat/avio.c
index 972c5334f3..b2926c0f3c 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c
@@ -60,11 +60,11 @@ URLProtocol *av_protocol_next(URLProtocol *p)
 
 const char *avio_enum_protocols(void **opaque, int output)
 {
-    URLProtocol **p = opaque;
-    *p = *p ? (*p)->next : first_protocol;
-    if (!*p) return NULL;
-    if ((output && (*p)->url_write) || (!output && (*p)->url_read))
-        return (*p)->name;
+    URLProtocol *p = *opaque;
+    p = p ? p->next : first_protocol;
+    if (!p) return NULL;
+    if ((output && p->url_write) || (!output && p->url_read))
+        return p->name;
     return avio_enum_protocols(opaque, output);
 }
 

From 2a30df09fd64634ad1b70485cd665ad05116730c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 20:52:43 +0200
Subject: [PATCH 495/830] Replace non-existent HAVE_SSE2 with HAVE_SSE.

Since this is only a compilation check (the actual function used is
selected at runtime) and HAVE_SSE indicates that we can also compile
SSE2 code, this is correct.
---
 libavfilter/libmpcodecs/vf_gradfun.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/libmpcodecs/vf_gradfun.c b/libavfilter/libmpcodecs/vf_gradfun.c
index 2732f55d9a..fd4236cc37 100644
--- a/libavfilter/libmpcodecs/vf_gradfun.c
+++ b/libavfilter/libmpcodecs/vf_gradfun.c
@@ -188,7 +188,7 @@ static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc,
 }
 #endif // HAVE_SSSE3
 
-#if HAVE_SSE2 && HAVE_6REGS
+#if HAVE_SSE && HAVE_6REGS
 #define BLURV(load)\
     intptr_t x = -2*width;\
     __asm__ volatile(\
@@ -231,7 +231,7 @@ static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1,
         BLURV("movdqa");
     }
 }
-#endif // HAVE_6REGS && HAVE_SSE2
+#endif // HAVE_6REGS && HAVE_SSE
 
 static void filter(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src,
                    int width, int height, int dstride, int sstride, int r)
@@ -385,7 +385,7 @@ static int vf_open(vf_instance_t *vf, char *args)
 
     vf->priv->blur_line = blur_line_c;
     vf->priv->filter_line = filter_line_c;
-#if HAVE_SSE2 && HAVE_6REGS
+#if HAVE_SSE && HAVE_6REGS
     if (gCpuCaps.hasSSE2)
         vf->priv->blur_line = blur_line_sse2;
 #endif

From 3379531c401b457c9f7437ee0db772da75fd1765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 21:24:16 +0200
Subject: [PATCH 496/830] Port recent changes to MPlayer libmpcodecs.

Also include an older fix for vf_smartblur which was essentially
broken due to reading the threshold value wrongly.
---
 libavfilter/libmpcodecs/vf_divtc.c     |  3 ++-
 libavfilter/libmpcodecs/vf_ilpack.c    | 13 ++++++++-----
 libavfilter/libmpcodecs/vf_pp7.c       |  4 ++--
 libavfilter/libmpcodecs/vf_smartblur.c |  4 ++--
 libavfilter/libmpcodecs/vf_unsharp.c   |  2 +-
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavfilter/libmpcodecs/vf_divtc.c b/libavfilter/libmpcodecs/vf_divtc.c
index 3ead47290d..4c171d1728 100644
--- a/libavfilter/libmpcodecs/vf_divtc.c
+++ b/libavfilter/libmpcodecs/vf_divtc.c
@@ -598,7 +598,8 @@ static void uninit(struct vf_instance *vf)
 static int vf_open(vf_instance_t *vf, char *args)
    {
    struct vf_priv_s *p;
-   char *filename="framediff.log", *ap, *q, *a;
+   const char *filename="framediff.log";
+   char *ap, *q, *a;
 
    if(args && !(args=av_strdup(args)))
       {
diff --git a/libavfilter/libmpcodecs/vf_ilpack.c b/libavfilter/libmpcodecs/vf_ilpack.c
index 77555a7b41..db4a849e1f 100644
--- a/libavfilter/libmpcodecs/vf_ilpack.c
+++ b/libavfilter/libmpcodecs/vf_ilpack.c
@@ -28,6 +28,7 @@
 #include "img_format.h"
 #include "mp_image.h"
 #include "vf.h"
+#include "libavutil/attributes.h"
 
 typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
     unsigned char *u, unsigned char *v, int w, int us, int vs);
@@ -38,7 +39,8 @@ struct vf_priv_s {
 };
 
 static void pack_nn_C(unsigned char *dst, unsigned char *y,
-    unsigned char *u, unsigned char *v, int w)
+    unsigned char *u, unsigned char *v, int w,
+    int av_unused us, int av_unused vs)
 {
     int j;
     for (j = w/2; j; j--) {
@@ -77,7 +79,8 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y,
 
 #if HAVE_MMX
 static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
-    unsigned char *u, unsigned char *v, int w)
+    unsigned char *u, unsigned char *v, int w,
+    int av_unused us, int av_unused vs)
 {
     __asm__ volatile (""
         ASMALIGN(4)
@@ -103,7 +106,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
         : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
         : "memory"
         );
-    pack_nn_C(dst, y, u, v, (w&7));
+    pack_nn_C(dst, y, u, v, (w&7), 0, 0);
 }
 
 #if HAVE_EBX_AVAILABLE
@@ -413,12 +416,12 @@ static int vf_open(vf_instance_t *vf, char *args)
     vf->priv->mode = 1;
     if (args) sscanf(args, "%d", &vf->priv->mode);
 
-    pack_nn = (pack_func_t *)pack_nn_C;
+    pack_nn = pack_nn_C;
     pack_li_0 = pack_li_0_C;
     pack_li_1 = pack_li_1_C;
 #if HAVE_MMX
     if(gCpuCaps.hasMMX) {
-        pack_nn = (pack_func_t *)pack_nn_MMX;
+        pack_nn = pack_nn_MMX;
 #if HAVE_EBX_AVAILABLE
         pack_li_0 = pack_li_0_MMX;
         pack_li_1 = pack_li_1_MMX;
diff --git a/libavfilter/libmpcodecs/vf_pp7.c b/libavfilter/libmpcodecs/vf_pp7.c
index f8b64b658a..c075d6619c 100644
--- a/libavfilter/libmpcodecs/vf_pp7.c
+++ b/libavfilter/libmpcodecs/vf_pp7.c
@@ -286,8 +286,8 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stri
     int x, y;
     const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
     uint8_t  *p_src= p->src + 8*stride;
-    DCTELEM *block= p->src;
-    DCTELEM *temp= p->src + 32;
+    DCTELEM *block= (DCTELEM *)p->src;
+    DCTELEM *temp= (DCTELEM *)(p->src + 32);
 
     if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
     for(y=0; y<height; y++){
diff --git a/libavfilter/libmpcodecs/vf_smartblur.c b/libavfilter/libmpcodecs/vf_smartblur.c
index 3e20880f04..8acdb73ffc 100644
--- a/libavfilter/libmpcodecs/vf_smartblur.c
+++ b/libavfilter/libmpcodecs/vf_smartblur.c
@@ -183,11 +183,11 @@ static inline void blur(uint8_t *dst, uint8_t *src, int w, int h, int dstStride,
 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){
     int cw= mpi->w >> mpi->chroma_x_shift;
     int ch= mpi->h >> mpi->chroma_y_shift;
-    FilterParam *f= &vf->priv;
+    int threshold = vf->priv->luma.threshold || vf->priv->chroma.threshold;
 
     mp_image_t *dmpi=vf_get_image(vf->next,mpi->imgfmt,
         MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE|
-        (f->threshold) ? MP_IMGFLAG_READABLE : 0,
+        (threshold ? MP_IMGFLAG_READABLE : 0),
         mpi->w,mpi->h);
 
     assert(mpi->flags&MP_IMGFLAG_PLANAR);
diff --git a/libavfilter/libmpcodecs/vf_unsharp.c b/libavfilter/libmpcodecs/vf_unsharp.c
index cd464321f4..db22f78e9d 100644
--- a/libavfilter/libmpcodecs/vf_unsharp.c
+++ b/libavfilter/libmpcodecs/vf_unsharp.c
@@ -132,7 +132,7 @@ static int config( struct vf_instance *vf,
 
     int z, stepsX, stepsY;
     FilterParam *fp;
-    char *effect;
+    const char *effect;
 
     // allocate buffers
 

From 3c194f390fe7e18a6e2e51eb4f29495eb230585e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 22:15:36 +0200
Subject: [PATCH 497/830] Replace "vector const" by "const vector" otherwise
 gcc 4.6.0 fails.

Given that this compiles fine with the Apple compiler that is probably
a gcc bug, but "const vector" is nicer anyway.
---
 libswscale/ppc/yuv2rgb_altivec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 626d55f5f7..8aaa987e62 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -298,7 +298,7 @@ static int altivec_##name (SwsContext *c,                               \
     vector signed short R1,G1,B1;                                       \
     vector unsigned char R,G,B;                                         \
                                                                         \
-    vector const unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            \
+    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            \
     vector unsigned char align_perm;                                    \
                                                                         \
     vector signed short                                                 \
@@ -335,10 +335,10 @@ static int altivec_##name (SwsContext *c,                               \
                                                                         \
         for (j=0;j<w/16;j++) {                                          \
                                                                         \
-            y1ivP = (vector const unsigned char *)y1i;                  \
-            y2ivP = (vector const unsigned char *)y2i;                  \
-            uivP  = (vector const unsigned char *)ui;                   \
-            vivP  = (vector const unsigned char *)vi;                   \
+            y1ivP = (const vector unsigned char *)y1i;                  \
+            y2ivP = (const vector unsigned char *)y2i;                  \
+            uivP  = (const vector unsigned char *)ui;                   \
+            vivP  = (const vector unsigned char *)vi;                   \
                                                                         \
             align_perm = vec_lvsl (0, y1i);                             \
             y0 = (vector unsigned char)                                 \

From 58fd70b04decdb7e5580c06b1be3bd573fabeeda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 21:30:13 +0200
Subject: [PATCH 498/830] Port remove of get_sws_cpuflags from MPlayer's
 libmpcodecs.

---
 libavfilter/libmpcodecs/vf_sab.c       | 2 +-
 libavfilter/libmpcodecs/vf_scale.h     | 1 -
 libavfilter/libmpcodecs/vf_smartblur.c | 2 +-
 libavfilter/vf_mp.c                    | 4 ----
 4 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/libavfilter/libmpcodecs/vf_sab.c b/libavfilter/libmpcodecs/vf_sab.c
index 377c9e33a8..51e9d78158 100644
--- a/libavfilter/libmpcodecs/vf_sab.c
+++ b/libavfilter/libmpcodecs/vf_sab.c
@@ -102,7 +102,7 @@ static int allocStuff(FilterParam *f, int width, int height){
     swsF.lumH= swsF.lumV= vec;
     swsF.chrH= swsF.chrV= NULL;
     f->preFilterContext= sws_getContext(
-        width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, get_sws_cpuflags()|SWS_POINT, &swsF, NULL, NULL);
+        width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_POINT, &swsF, NULL, NULL);
 
     sws_freeVec(vec);
     vec = sws_getGaussianVec(f->strength, 5.0);
diff --git a/libavfilter/libmpcodecs/vf_scale.h b/libavfilter/libmpcodecs/vf_scale.h
index 91ed103c30..4de3b48ec3 100644
--- a/libavfilter/libmpcodecs/vf_scale.h
+++ b/libavfilter/libmpcodecs/vf_scale.h
@@ -29,7 +29,6 @@ extern float sws_lum_sharpen;
 
 extern int sws_flags;
 
-int get_sws_cpuflags(void);
 struct SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat);
 
 #endif /* MPLAYER_VF_SCALE_H */
diff --git a/libavfilter/libmpcodecs/vf_smartblur.c b/libavfilter/libmpcodecs/vf_smartblur.c
index 8acdb73ffc..5bfcb2806e 100644
--- a/libavfilter/libmpcodecs/vf_smartblur.c
+++ b/libavfilter/libmpcodecs/vf_smartblur.c
@@ -87,7 +87,7 @@ static int allocStuff(FilterParam *f, int width, int height){
     swsF.lumH= swsF.lumV= vec;
     swsF.chrH= swsF.chrV= NULL;
     f->filterContext= sws_getContext(
-        width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_BICUBIC | get_sws_cpuflags(), &swsF, NULL, NULL);
+        width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_BICUBIC, &swsF, NULL, NULL);
 
     sws_freeVec(vec);
 
diff --git a/libavfilter/vf_mp.c b/libavfilter/vf_mp.c
index 7dd8a64df8..01a8f064a2 100644
--- a/libavfilter/vf_mp.c
+++ b/libavfilter/vf_mp.c
@@ -287,10 +287,6 @@ zrmjpeg
 CpuCaps gCpuCaps; //FIXME initialize this so optims work
 
 
-int get_sws_cpuflags(void){
-    return 0;
-}
-
 static void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)
 {
         static int firstTime=1;

From b443447536116f2843097f26a693478c66dcbe02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux@gmail.com>
Date: Sun, 29 May 2011 21:02:20 +0200
Subject: [PATCH 499/830] Fix various uninitialized variable warnings

---
 ffplay.c                  | 2 +-
 libavformat/iff.c         | 2 ++
 libavformat/matroskadec.c | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index ffe1c0f12d..74d1f10945 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1740,7 +1740,7 @@ static int video_thread(void *arg)
 {
     VideoState *is = arg;
     AVFrame *frame= avcodec_alloc_frame();
-    int64_t pts_int, pos;
+    int64_t pts_int = AV_NOPTS_VALUE, pos = -1;
     double pts;
     int ret;
 
diff --git a/libavformat/iff.c b/libavformat/iff.c
index 2dd1ef7553..a0a43742dd 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -326,6 +326,8 @@ static int iff_read_packet(AVFormatContext *s,
         buf = pkt->data;
         bytestream_put_be16(&buf, 2);
         ret = avio_read(pb, buf, iff->body_size);
+    } else {
+        av_abort();
     }
 
     if(iff->sent_bytes == 0)
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 569b95fbb2..031e89ac8d 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1037,7 +1037,8 @@ static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
     char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
     for (; *ptr!=',' && ptr<end-1; ptr++);
     if (*ptr == ',')
-        layer = ++ptr;
+        ptr++;
+    layer = ptr;
     for (; *ptr!=',' && ptr<end-1; ptr++);
     if (*ptr == ',') {
         int64_t end_pts = pkt->pts + display_duration;

From c96f3750c22ef1576a46140f3101e3585041f41f Mon Sep 17 00:00:00 2001
From: Piotr Kaczuba <p.kaczuba@attika.ath.cx>
Date: Wed, 1 Jun 2011 18:47:37 +0200
Subject: [PATCH 500/830] postprocess: Remove test for impossible condition
 (was: Re: postprocess.c: replace check for p==NULL with *p==0)

---
 libpostproc/postprocess.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index bfb96e1a8f..589c2cead6 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -819,8 +819,7 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
                 int plen;
                 int spaceLeft;
 
-                if(p==NULL) p= temp, *p=0;      //last filter
-                else p--, *p=',';               //not last filter
+                p--, *p=',';
 
                 plen= strlen(p);
                 spaceLeft= p - temp + plen;

From d013c6da80003cb4b577867d1f091e47a0fe3776 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Wed, 1 Jun 2011 13:56:12 -0700
Subject: [PATCH 501/830] doc: cosmetics: libx264 typos

---
 doc/encoders.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 7913ffe7c3..a54f726397 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -538,8 +538,8 @@ For more information about libvpx see:
 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 format supported through
 libx264.
 
-Requires the presence of the libx64 headers and library during
-configuration. You need to explicitely configure the build with
+Requires the presence of the libx264 headers and library during
+configuration. You need to explicitly configure the build with
 @code{--enable-libx264}.
 
 @subsection Options

From e6635a9a19639a71c9c1f71c3b4547a0a6301d29 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 1 Jun 2011 21:56:55 -0700
Subject: [PATCH 502/830] h264: remove CONFIG_GPL from x86 intra prediction
 code.

The authors permitted relicensing to LGPL a long time ago (Holger,
Loren and Jason).
---
 libavcodec/x86/h264_intrapred.asm    | 6 ------
 libavcodec/x86/h264_intrapred_init.c | 8 --------
 2 files changed, 14 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 0b2f8d39df..cbf3cf7a5c 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -836,7 +836,6 @@ PRED8x8_H ssse3
 ;-----------------------------------------------------------------------------
 ; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
 cglobal pred8x8_top_dc_mmxext, 2,5
     sub         r0, r1
     movq       mm0, [r0]
@@ -927,7 +926,6 @@ cglobal pred8x8_dc_mmxext, 2,5
     movq [r4+r1*1], m1
     movq [r4+r1*2], m1
     RET
-%endif
 
 ;-----------------------------------------------------------------------------
 ; void pred8x8_dc_rv40(uint8_t *src, int stride)
@@ -1083,7 +1081,6 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6
 ;-----------------------------------------------------------------------------
 ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
 ;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
 %macro PRED8x8L_TOP_DC 1
 cglobal pred8x8l_top_dc_%1, 4,4
     sub          r0, r3
@@ -2476,7 +2473,6 @@ PRED8x8L_HORIZONTAL_DOWN sse2
 INIT_MMX
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_HORIZONTAL_DOWN ssse3
-%endif
 
 ;-----------------------------------------------------------------------------
 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
@@ -2608,7 +2604,6 @@ cglobal pred4x4_vertical_vp8_mmxext, 3,3
 ;-----------------------------------------------------------------------------
 ; void pred4x4_down_left_mmxext(uint8_t *src, const uint8_t *topright, int stride)
 ;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
 INIT_MMX
 cglobal pred4x4_down_left_mmxext, 3,3
     sub       r0, r2
@@ -2786,4 +2781,3 @@ cglobal pred4x4_down_right_mmxext, 3,3
     psrlq     m0, 8
     movh      [r0+r2*1], m0
     RET
-%endif
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 9eb752581b..bd57030660 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -129,7 +129,6 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
         h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
         h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmxext;
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
-#if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
         h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_mmxext;
         h->pred8x8l [HOR_PRED    ] = ff_pred8x8l_horizontal_mmxext;
@@ -142,9 +141,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
         h->pred4x4  [DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_mmxext;
         h->pred4x4  [VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_mmxext;
         h->pred4x4  [HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_mmxext;
-#endif
         h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
-#if CONFIG_GPL
         if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
             h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
         if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
@@ -156,7 +153,6 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
             h->pred8x8  [TOP_DC_PRED8x8      ] = ff_pred8x8_top_dc_mmxext;
             h->pred8x8  [DC_PRED8x8          ] = ff_pred8x8_dc_mmxext;
         }
-#endif
         if (codec_id == CODEC_ID_VP8) {
             h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
             h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmxext;
@@ -181,13 +177,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
 
     if (mm_flags & AV_CPU_FLAG_SSE2) {
         h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_sse2;
-#if CONFIG_GPL
         h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
         h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
         h->pred8x8l [VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_sse2;
         h->pred8x8l [VERT_LEFT_PRED      ] = ff_pred8x8l_vertical_left_sse2;
         h->pred8x8l [HOR_DOWN_PRED       ] = ff_pred8x8l_horizontal_down_sse2;
-#endif
         if (codec_id == CODEC_ID_VP8) {
             h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
@@ -207,7 +201,6 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
         h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
         h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_ssse3;
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
-#if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
         h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_ssse3;
         h->pred8x8l [HOR_PRED    ] = ff_pred8x8l_horizontal_ssse3;
@@ -218,7 +211,6 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
         h->pred8x8l [VERT_LEFT_PRED      ] = ff_pred8x8l_vertical_left_ssse3;
         h->pred8x8l [HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_ssse3;
         h->pred8x8l [HOR_DOWN_PRED       ] = ff_pred8x8l_horizontal_down_ssse3;
-#endif
         if (codec_id == CODEC_ID_VP8) {
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
             h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;

From cc9947ffbe8a847a49d092c9253910cfc53279ad Mon Sep 17 00:00:00 2001
From: Nathan Caldwell <saintdev@gmail.com>
Date: Tue, 31 May 2011 23:38:09 -0600
Subject: [PATCH 503/830] aacenc: Add stereo_mode option.

ms_off is the default, until Mid/Side is no longer buggy.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/aacenc.c | 32 +++++++++++++++++++++++++++++---
 libavcodec/aacenc.h |  6 ++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 92ff4b0e62..e8942a13f9 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -30,6 +30,7 @@
  * add temporal noise shaping
  ***********************************/
 
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "put_bits.h"
 #include "dsputil.h"
@@ -489,7 +490,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
     AACEncContext *s = avctx->priv_data;
     int16_t *samples = s->samples, *samples2, *la;
     ChannelElement *cpe;
-    int i, ch, w, chans, tag, start_ch;
+    int i, ch, w, g, chans, tag, start_ch;
     const uint8_t *chan_map = aac_chan_configs[avctx->channels-1];
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@@ -587,8 +588,16 @@ static int aac_encode_frame(AVCodecContext *avctx,
                 }
             }
             s->cur_channel = start_ch;
-            if (cpe->common_window && s->coder->search_for_ms)
-                s->coder->search_for_ms(s, cpe, s->lambda);
+            if (s->options.stereo_mode && cpe->common_window) {
+                if (s->options.stereo_mode > 0) {
+                    IndividualChannelStream *ics = &cpe->ch[0].ics;
+                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
+                        for (g = 0;  g < ics->num_swb; g++)
+                            cpe->ms_mask[w*16+g] = 1;
+                } else if (s->coder->search_for_ms) {
+                    s->coder->search_for_ms(s, cpe, s->lambda);
+                }
+            }
             adjust_frame_information(s, cpe, chans);
             if (chans == 2) {
                 put_bits(&s->pb, 1, cpe->common_window);
@@ -645,6 +654,22 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
     return 0;
 }
 
+#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption aacenc_options[] = {
+    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), FF_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
+        {"auto",     "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
+        {"ms_off",   "Disable Mid/Side coding", 0, FF_OPT_TYPE_CONST, {.dbl =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
+        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, FF_OPT_TYPE_CONST, {.dbl =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
+    {NULL}
+};
+
+static const AVClass aacenc_class = {
+    "AAC encoder",
+    av_default_item_name,
+    aacenc_options,
+    LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_aac_encoder = {
     "aac",
     AVMEDIA_TYPE_AUDIO,
@@ -656,4 +681,5 @@ AVCodec ff_aac_encoder = {
     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
+    .priv_class = &aacenc_class,
 };
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 7e08db24c0..067a9b04f3 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -30,6 +30,10 @@
 
 #include "psymodel.h"
 
+typedef struct AACEncOptions {
+    int stereo_mode;
+} AACEncOptions;
+
 struct AACEncContext;
 
 typedef struct AACCoefficientsEncoder {
@@ -48,6 +52,8 @@ extern AACCoefficientsEncoder ff_aac_coders[];
  * AAC encoder context
  */
 typedef struct AACEncContext {
+    AVClass *av_class;
+    AACEncOptions options;                       ///< encoding options
     PutBitContext pb;
     FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
     FFTContext mdct128;                          ///< short (128 samples) frame transform context

From 994c3550ffe032385833c21876fb121f59516079 Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Wed, 1 Jun 2011 21:53:15 -0400
Subject: [PATCH 504/830] 2x faster h264_idct_add8_10.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/h264_idct_10bit.asm | 55 ++++++++++++------------------
 1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 3d0004e09e..64089001e5 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -249,16 +249,17 @@ IDCT8_DC_ADD avx
     jmp .skipadd%2
 %endmacro
 
+%assign last_block 16
 %macro ADD16_OP_INTRA 3
     cmp         word [r4+%3], 0
     jnz .ac%2
-    mov         r6d, dword [r2+ 0]
-    or          r6d, dword [r2+64]
+    mov         r5d, dword [r2+ 0]
+    or          r5d, dword [r2+64]
     jz .skipblock%2
-    mov  r5d, dword [r1+(%2+0)*4]
+    mov         r5d, dword [r1+(%2+0)*4]
     call idct_dc_add_%1
 .skipblock%2:
-%if %2<15
+%if %2<last_block-2
     add          r2, 128
 %endif
 .skipadd%2:
@@ -302,47 +303,33 @@ INIT_AVX
 IDCT_ADD16INTRA_10 avx
 %endif
 
+%assign last_block 24
 ;-----------------------------------------------------------------------------
 ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
 %macro IDCT_ADD8 1
 cglobal h264_idct_add8_10_%1,5,7
-    mov          r5, 16
-    add          r2, 1024
-%ifdef PIC
-    lea         r11, [scan8_mem]
-%endif
 %ifdef ARCH_X86_64
-    mov         r10, r0
+    mov r10, r0
 %endif
-.nextblock:
-    movzx        r6, byte [scan8+r5]
-    movzx        r6, byte [r4+r6]
-    or          r6d, dword [r2]
-    test         r6, r6
-    jz .skipblock
+    add r2, 1024
+    mov r0, [r0]
+    ADD16_OP_INTRA %1, 16, 1+1*8
+    ADD16_OP_INTRA %1, 18, 1+2*8
 %ifdef ARCH_X86_64
-    mov         r0d, dword [r1+r5*4]
-    add          r0, [r10]
+    mov r0, [r10+gprsize]
 %else
-    mov          r0, r0m
-    mov          r0, [r0]
-    add          r0, dword [r1+r5*4]
+    mov r0, r0m
+    mov r0, [r0+gprsize]
 %endif
-    IDCT4_ADD_10 r0, r2, r3
-.skipblock:
-    inc          r5
-    add          r2, 64
-    test         r5, 3
-    jnz .nextblock
-%ifdef ARCH_X86_64
-    add         r10, gprsize
-%else
-    add        r0mp, gprsize
-%endif
-    test         r5, 4
-    jnz .nextblock
+    ADD16_OP_INTRA %1, 20, 1+4*8
+    ADD16_OP_INTRA %1, 22, 1+5*8
     REP_RET
+    AC %1, 16
+    AC %1, 18
+    AC %1, 20
+    AC %1, 22
+
 %endmacro ; IDCT_ADD8
 
 INIT_XMM

From 53be7b23e9d7074d1aeee77407b008411d034e9e Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Wed, 1 Jun 2011 01:01:01 -0400
Subject: [PATCH 505/830] Cosmetic changes to h264_idct_10bit.asm.

Removes redundant dword tags and whitespace changes.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/h264_idct_10bit.asm | 131 +++++++++++++++--------------
 1 file changed, 67 insertions(+), 64 deletions(-)

diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 64089001e5..3f7cf4cefc 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -133,7 +133,7 @@ ADD4x4IDCT avx
 %macro ADD16_OP 3
     cmp          byte [r4+%3], 0
     jz .skipblock%2
-    mov         r5d, dword [r1+%2*4]
+    mov         r5d, [r1+%2*4]
     call add4x4_idct_%1
 .skipblock%2:
 %if %2<15
@@ -159,7 +159,7 @@ cglobal h264_idct_add16_10_%1, 5,6
     ADD16_OP %1, 13, 7+3*8
     ADD16_OP %1, 14, 6+4*8
     ADD16_OP %1, 15, 7+4*8
-    RET
+    REP_RET
 %endmacro
 
 INIT_XMM
@@ -201,7 +201,7 @@ IDCT_ADD16_10 avx
 
 INIT_MMX
 cglobal h264_idct_dc_add_10_mmx2,3,3
-    movd      m0, dword [r1]
+    movd      m0, [r1]
     paddd     m0, [pd_32]
     psrad     m0, 6
     lea       r1, [r2*3]
@@ -215,7 +215,7 @@ cglobal h264_idct_dc_add_10_mmx2,3,3
 ;-----------------------------------------------------------------------------
 %macro IDCT8_DC_ADD 1
 cglobal h264_idct8_dc_add_10_%1,3,3,7
-    mov      r1d, dword [r1]
+    mov      r1d, [r1]
     add       r1, 32
     sar       r1, 6
     movd      m0, r1d
@@ -240,9 +240,9 @@ IDCT8_DC_ADD avx
 ;-----------------------------------------------------------------------------
 %macro AC 2
 .ac%2
-    mov  r5d, dword [r1+(%2+0)*4]
+    mov  r5d, [r1+(%2+0)*4]
     call add4x4_idct_%1
-    mov  r5d, dword [r1+(%2+1)*4]
+    mov  r5d, [r1+(%2+1)*4]
     add  r2, 64
     call add4x4_idct_%1
     add  r2, 64
@@ -251,16 +251,16 @@ IDCT8_DC_ADD avx
 
 %assign last_block 16
 %macro ADD16_OP_INTRA 3
-    cmp         word [r4+%3], 0
+    cmp      word [r4+%3], 0
     jnz .ac%2
-    mov         r5d, dword [r2+ 0]
-    or          r5d, dword [r2+64]
+    mov      r5d, [r2+ 0]
+    or       r5d, [r2+64]
     jz .skipblock%2
-    mov         r5d, dword [r1+(%2+0)*4]
+    mov      r5d, [r1+(%2+0)*4]
     call idct_dc_add_%1
 .skipblock%2:
 %if %2<last_block-2
-    add          r2, 128
+    add       r2, 128
 %endif
 .skipadd%2:
 %endmacro
@@ -288,12 +288,15 @@ cglobal h264_idct_add16intra_10_%1,5,7,8
     ADD16_OP_INTRA %1, 10, 4+4*8
     ADD16_OP_INTRA %1, 12, 6+3*8
     ADD16_OP_INTRA %1, 14, 6+4*8
-    RET
-%assign i 14
-%rep 8
-    AC %1, i
-%assign i i-2
-%endrep
+    REP_RET
+    AC %1, 8
+    AC %1, 10
+    AC %1, 12
+    AC %1, 14
+    AC %1, 0
+    AC %1, 2
+    AC %1, 4
+    AC %1, 6
 %endmacro
 
 INIT_XMM
@@ -312,15 +315,15 @@ cglobal h264_idct_add8_10_%1,5,7
 %ifdef ARCH_X86_64
     mov r10, r0
 %endif
-    add r2, 1024
-    mov r0, [r0]
+    add      r2, 1024
+    mov      r0, [r0]
     ADD16_OP_INTRA %1, 16, 1+1*8
     ADD16_OP_INTRA %1, 18, 1+2*8
 %ifdef ARCH_X86_64
-    mov r0, [r10+gprsize]
+    mov      r0, [r10+gprsize]
 %else
-    mov r0, r0m
-    mov r0, [r0+gprsize]
+    mov      r0, r0m
+    mov      r0, [r0+gprsize]
 %endif
     ADD16_OP_INTRA %1, 20, 1+4*8
     ADD16_OP_INTRA %1, 22, 1+5*8
@@ -343,51 +346,51 @@ IDCT_ADD8 avx
 ; void h264_idct8_add(pixel *dst, dctcoef *block, int stride)
 ;-----------------------------------------------------------------------------
 %macro IDCT8_1D 2
-    SWAP         0, 1
-    psrad        m4, m5, 1
-    psrad        m1, m0, 1
-    paddd        m4, m5
-    paddd        m1, m0
-    paddd        m4, m7
-    paddd        m1, m5
-    psubd        m4, m0
-    paddd        m1, m3
+    SWAP      0, 1
+    psrad     m4, m5, 1
+    psrad     m1, m0, 1
+    paddd     m4, m5
+    paddd     m1, m0
+    paddd     m4, m7
+    paddd     m1, m5
+    psubd     m4, m0
+    paddd     m1, m3
 
-    psubd        m0, m3
-    psubd        m5, m3
-    paddd        m0, m7
-    psubd        m5, m7
-    psrad        m3, 1
-    psrad        m7, 1
-    psubd        m0, m3
-    psubd        m5, m7
+    psubd     m0, m3
+    psubd     m5, m3
+    paddd     m0, m7
+    psubd     m5, m7
+    psrad     m3, 1
+    psrad     m7, 1
+    psubd     m0, m3
+    psubd     m5, m7
 
-    SWAP         1, 7
-    psrad        m1, m7, 2
-    psrad        m3, m4, 2
-    paddd        m3, m0
-    psrad        m0, 2
-    paddd        m1, m5
-    psrad        m5, 2
-    psubd        m0, m4
-    psubd        m7, m5
+    SWAP      1, 7
+    psrad     m1, m7, 2
+    psrad     m3, m4, 2
+    paddd     m3, m0
+    psrad     m0, 2
+    paddd     m1, m5
+    psrad     m5, 2
+    psubd     m0, m4
+    psubd     m7, m5
 
-    SWAP         5, 6
-    psrad        m4, m2, 1
-    psrad        m6, m5, 1
-    psubd        m4, m5
-    paddd        m6, m2
+    SWAP      5, 6
+    psrad     m4, m2, 1
+    psrad     m6, m5, 1
+    psubd     m4, m5
+    paddd     m6, m2
 
-    mova         m2, %1
-    mova         m5, %2
-    SUMSUB_BA    d, 5, 2
-    SUMSUB_BA    d, 6, 5
-    SUMSUB_BA    d, 4, 2
-    SUMSUB_BA    d, 7, 6
-    SUMSUB_BA    d, 0, 4
-    SUMSUB_BA    d, 3, 2
-    SUMSUB_BA    d, 1, 5
-    SWAP         7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
+    mova      m2, %1
+    mova      m5, %2
+    SUMSUB_BA d, 5, 2
+    SUMSUB_BA d, 6, 5
+    SUMSUB_BA d, 4, 2
+    SUMSUB_BA d, 7, 6
+    SUMSUB_BA d, 0, 4
+    SUMSUB_BA d, 3, 2
+    SUMSUB_BA d, 1, 5
+    SWAP      7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
 %endmacro
 
 %macro IDCT8_1D_FULL 1
@@ -523,7 +526,7 @@ IDCT8_ADD avx
 %macro IDCT8_ADD4_OP 3
     cmp       byte [r4+%3], 0
     jz .skipblock%2
-    mov      r0d, dword [r6+%2*4]
+    mov      r0d, [r6+%2*4]
     add       r0, r5
     call h264_idct8_add1_10_%1
 .skipblock%2:

From 77b32b73ed31f9aaa6c1e476c9a041399a35be9d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 16:26:55 +0200
Subject: [PATCH 506/830] lavfi: apply misc style fixes

Adopt K&R style for overall consistency/readability.
---
 libavfilter/avfilter.c | 14 +++++++-------
 libavfilter/defaults.c | 22 +++++++++++-----------
 libavfilter/internal.h |  2 +-
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 72e0a87f8e..b7ad6f0503 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -77,8 +77,8 @@ static void store_in_pool(AVFilterBufferRef *ref)
 
     av_assert0(ref->buf->data[0]);
 
-    if(pool->count == POOL_SIZE){
-        AVFilterBufferRef *ref1= pool->pic[0];
+    if (pool->count == POOL_SIZE) {
+        AVFilterBufferRef *ref1 = pool->pic[0];
         av_freep(&ref1->video);
         av_freep(&ref1->audio);
         av_freep(&ref1->buf->data[0]);
@@ -89,9 +89,9 @@ static void store_in_pool(AVFilterBufferRef *ref)
         pool->pic[POOL_SIZE-1] = NULL;
     }
 
-    for(i=0; i<POOL_SIZE; i++){
-        if(!pool->pic[i]){
-            pool->pic[i]= ref;
+    for (i = 0; i < POOL_SIZE; i++) {
+        if (!pool->pic[i]) {
+            pool->pic[i] = ref;
             pool->count++;
             break;
         }
@@ -102,8 +102,8 @@ void avfilter_unref_buffer(AVFilterBufferRef *ref)
 {
     if (!ref)
         return;
-    if (!(--ref->buf->refcount)){
-        if(!ref->buf->free){
+    if (!(--ref->buf->refcount)) {
+        if (!ref->buf->free) {
             store_in_pool(ref);
             return;
         }
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 9ee23e57b7..74ba599f9c 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -40,28 +40,28 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     uint8_t *data[4];
     int i;
     AVFilterBufferRef *picref = NULL;
-    AVFilterPool *pool= link->pool;
+    AVFilterPool *pool = link->pool;
 
-    if(pool) for(i=0; i<POOL_SIZE; i++){
-        picref= pool->pic[i];
-        if(picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h){
-            AVFilterBuffer *pic= picref->buf;
-            pool->pic[i]= NULL;
+    if (pool) for (i = 0; i < POOL_SIZE; i++) {
+        picref = pool->pic[i];
+        if (picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h) {
+            AVFilterBuffer *pic = picref->buf;
+            pool->pic[i] = NULL;
             pool->count--;
             picref->video->w = w;
             picref->video->h = h;
             picref->perms = perms | AV_PERM_READ;
-            picref->format= link->format;
+            picref->format = link->format;
             pic->refcount = 1;
             memcpy(picref->data,     pic->data,     sizeof(picref->data));
             memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize));
             return picref;
         }
-    }else
+    } else
         pool = link->pool = av_mallocz(sizeof(AVFilterPool));
 
     // +2 is needed for swscaler, +16 to be SIMD-friendly
-    if ((i=av_image_alloc(data, linesize, w, h, link->format, 16)) < 0)
+    if ((i = av_image_alloc(data, linesize, w, h, link->format, 16)) < 0)
         return NULL;
 
     picref = avfilter_get_video_buffer_ref_from_arrays(data, linesize,
@@ -72,8 +72,8 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     }
     memset(data[0], 128, i);
 
-    picref->buf->priv= pool;
-    picref->buf->free= NULL;
+    picref->buf->priv = pool;
+    picref->buf->free = NULL;
 
     return picref;
 }
diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index 333dcbff81..be1e9b08f2 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h
@@ -31,7 +31,7 @@
 typedef struct AVFilterPool {
     AVFilterBufferRef *pic[POOL_SIZE];
     int count;
-}AVFilterPool;
+} AVFilterPool;
 
 /**
  * Check for the validity of graph.

From 0ff5cbedd2b6e813064fe4f8aab735162889037c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 16:27:56 +0200
Subject: [PATCH 507/830] lavfi: clarify the context of a comment in
 avfilter_default_get_video_buffer()

The comment is meant to be about the align parameter.
---
 libavfilter/defaults.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 74ba599f9c..4a01b10b9c 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -60,7 +60,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     } else
         pool = link->pool = av_mallocz(sizeof(AVFilterPool));
 
-    // +2 is needed for swscaler, +16 to be SIMD-friendly
+    // align: +2 is needed for swscaler, +16 to be SIMD-friendly
     if ((i = av_image_alloc(data, linesize, w, h, link->format, 16)) < 0)
         return NULL;
 

From 6f1dd6f45a641ca7670c7b00fbeea42420b89ada Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 16:33:25 +0200
Subject: [PATCH 508/830] lavfi: add braces around the block of an if()
 expression in avfilter_default_get_video_buffer

Clarify code layout.
---
 libavfilter/defaults.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 4a01b10b9c..c83d500652 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -42,7 +42,8 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     AVFilterBufferRef *picref = NULL;
     AVFilterPool *pool = link->pool;
 
-    if (pool) for (i = 0; i < POOL_SIZE; i++) {
+    if (pool) {
+        for (i = 0; i < POOL_SIZE; i++) {
         picref = pool->pic[i];
         if (picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h) {
             AVFilterBuffer *pic = picref->buf;
@@ -57,6 +58,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
             memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize));
             return picref;
         }
+        }
     } else
         pool = link->pool = av_mallocz(sizeof(AVFilterPool));
 

From 665e608c8d920d83216e1b8fd1445ee335528f13 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 16:36:12 +0200
Subject: [PATCH 509/830] lavfi: reindent after the previous commit

---
 libavfilter/defaults.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index c83d500652..a994f36079 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -44,20 +44,20 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
 
     if (pool) {
         for (i = 0; i < POOL_SIZE; i++) {
-        picref = pool->pic[i];
-        if (picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h) {
-            AVFilterBuffer *pic = picref->buf;
-            pool->pic[i] = NULL;
-            pool->count--;
-            picref->video->w = w;
-            picref->video->h = h;
-            picref->perms = perms | AV_PERM_READ;
-            picref->format = link->format;
-            pic->refcount = 1;
-            memcpy(picref->data,     pic->data,     sizeof(picref->data));
-            memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize));
-            return picref;
-        }
+            picref = pool->pic[i];
+            if (picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h) {
+                AVFilterBuffer *pic = picref->buf;
+                pool->pic[i] = NULL;
+                pool->count--;
+                picref->video->w = w;
+                picref->video->h = h;
+                picref->perms = perms | AV_PERM_READ;
+                picref->format = link->format;
+                pic->refcount = 1;
+                memcpy(picref->data,     pic->data,     sizeof(picref->data));
+                memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize));
+                return picref;
+            }
         }
     } else
         pool = link->pool = av_mallocz(sizeof(AVFilterPool));

From 9e66b64c360568a91faa84d0cda96ab93c467502 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 16:09:24 +0200
Subject: [PATCH 510/830] vsrc_buffer: propagate error code in
 av_vsrc_buffer_add_frame()

Propagate av_vsrc_buffer_add_video_buffer_ref() error code rather than
return 0.
---
 libavfilter/vsrc_buffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index d1e6ffd57a..9ba7d4ee47 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -111,15 +111,16 @@ int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilter
 
 int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, const AVFrame *frame)
 {
+    int ret;
     AVFilterBufferRef *picref =
         avfilter_get_video_buffer_ref_from_frame(frame, AV_PERM_WRITE);
     if (!picref)
         return AVERROR(ENOMEM);
-    av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref);
+    ret = av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref);
     picref->buf->data[0] = NULL;
     avfilter_unref_buffer(picref);
 
-    return 0;
+    return ret;
 }
 #endif
 

From 6a9c85944427e3c4355bce67d7f677ec69527bff Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Thu, 2 Jun 2011 10:15:58 -0700
Subject: [PATCH 511/830] H264/MPEG frame-level multi-threading.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/error_resilience.c |  45 ++-
 libavcodec/h263dec.c          |   8 +
 libavcodec/h264.c             | 736 ++++++++++++++++++++++++++--------
 libavcodec/h264.h             |   2 +
 libavcodec/h264_direct.c      |  41 +-
 libavcodec/mpeg12.c           |  36 +-
 libavcodec/mpeg4videodec.c    |  20 +-
 libavcodec/mpegvideo.c        | 259 ++++++++++--
 libavcodec/mpegvideo.h        |  18 +-
 libavcodec/mpegvideo_enc.c    |  24 +-
 libavcodec/pthread.c          |   2 +-
 libavcodec/utils.c            |   4 +-
 tests/fate/h264.mak           | 204 +++++-----
 tests/ref/vsynth1/error       |   6 +-
 tests/ref/vsynth2/error       |   6 +-
 tests/regression-funcs.sh     |   2 +-
 16 files changed, 1058 insertions(+), 355 deletions(-)

diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index e7588a9867..aea0e15b34 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -32,6 +32,7 @@
 #include "mpegvideo.h"
 #include "h264.h"
 #include "rectangle.h"
+#include "thread.h"
 
 /*
  * H264 redefines mb_intra so it is not mistakely used (its uninitialized in h264)
@@ -428,8 +429,7 @@ int score_sum=0;
                     int best_score=256*256*256*64;
                     int best_pred=0;
                     const int mot_index= (mb_x + mb_y*mot_stride) * mot_step;
-                    int prev_x= s->current_picture.motion_val[0][mot_index][0];
-                    int prev_y= s->current_picture.motion_val[0][mot_index][1];
+                    int prev_x, prev_y, prev_ref;
 
                     if((mb_x^mb_y^pass)&1) continue;
 
@@ -527,11 +527,26 @@ skip_mean_and_median:
                     /* zero MV */
                     pred_count++;
 
+                    if (!fixed[mb_xy]) {
+                        if (s->avctx->codec_id == CODEC_ID_H264) {
+                            // FIXME
+                        } else {
+                            ff_thread_await_progress((AVFrame *) s->last_picture_ptr,
+                                                     mb_y, 0);
+                        }
+                        prev_x = s->last_picture.motion_val[0][mot_index][0];
+                        prev_y = s->last_picture.motion_val[0][mot_index][1];
+                        prev_ref = s->last_picture.ref_index[0][4*mb_xy];
+                    } else {
+                        prev_x = s->current_picture.motion_val[0][mot_index][0];
+                        prev_y = s->current_picture.motion_val[0][mot_index][1];
+                        prev_ref = s->current_picture.ref_index[0][4*mb_xy];
+                    }
+
                     /* last MV */
-                    mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index][0];
-                    mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1];
-                    ref         [pred_count]   = s->current_picture.ref_index[0][4*mb_xy];
-                    pred_count++;
+                    mv_predictor[pred_count][0]= prev_x;
+                    mv_predictor[pred_count][1]= prev_y;
+                    ref         [pred_count]   = prev_ref;
 
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra=0;
@@ -662,6 +677,12 @@ static int is_intra_more_likely(MpegEncContext *s){
                 uint8_t *mb_ptr     = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
                 uint8_t *last_mb_ptr= s->last_picture.data   [0] + mb_x*16 + mb_y*16*s->linesize;
 
+                if (s->avctx->codec_id == CODEC_ID_H264) {
+                    // FIXME
+                } else {
+                    ff_thread_await_progress((AVFrame *) s->last_picture_ptr,
+                                             mb_y, 0);
+                }
                 is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr                    , s->linesize, 16);
                 is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
             }else{
@@ -681,6 +702,7 @@ void ff_er_frame_start(MpegEncContext *s){
 
     memset(s->error_status_table, MV_ERROR|AC_ERROR|DC_ERROR|VP_START|AC_END|DC_END|MV_END, s->mb_stride*s->mb_height*sizeof(uint8_t));
     s->error_count= 3*s->mb_num;
+    s->error_occurred = 0;
 }
 
 /**
@@ -720,7 +742,10 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en
         s->error_count -= end_i - start_i + 1;
     }
 
-    if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) s->error_count= INT_MAX;
+    if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) {
+        s->error_occurred = 1;
+        s->error_count= INT_MAX;
+    }
 
     if(mask == ~0x7F){
         memset(&s->error_status_table[start_xy], 0, (end_xy - start_xy) * sizeof(uint8_t));
@@ -995,6 +1020,12 @@ void ff_er_frame_end(MpegEncContext *s){
                     int time_pp= s->pp_time;
                     int time_pb= s->pb_time;
 
+                    if (s->avctx->codec_id == CODEC_ID_H264) {
+                        //FIXME
+                    } else {
+                        ff_thread_await_progress((AVFrame *) s->next_picture_ptr,
+                                                 mb_y, 0);
+                    }
                     s->mv[0][0][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp;
                     s->mv[0][0][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp;
                     s->mv[1][0][0] = s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index e2627fa8e4..bc6d613be4 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -35,6 +35,7 @@
 #include "mpeg4video_parser.h"
 #include "msmpeg4.h"
 #include "vdpau_internal.h"
+#include "thread.h"
 #include "flv.h"
 #include "mpeg4video.h"
 
@@ -229,6 +230,7 @@ static int decode_slice(MpegEncContext *s){
                     if(++s->mb_x >= s->mb_width){
                         s->mb_x=0;
                         ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
+                        MPV_report_decode_progress(s);
                         s->mb_y++;
                     }
                     return 0;
@@ -249,6 +251,7 @@ static int decode_slice(MpegEncContext *s){
         }
 
         ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
+        MPV_report_decode_progress(s);
 
         s->mb_x= 0;
     }
@@ -609,6 +612,8 @@ retry:
     if(MPV_frame_start(s, avctx) < 0)
         return -1;
 
+    if (!s->divx_packed) ff_thread_finish_setup(avctx);
+
     if (CONFIG_MPEG4_VDPAU_DECODER && (s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)) {
         ff_vdpau_mpeg4_decode_picture(s, s->gb.buffer, s->gb.buffer_end - s->gb.buffer);
         goto frame_end;
@@ -639,8 +644,11 @@ retry:
             if(s->slice_height==0 || s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits)
                 break;
         }else{
+            int prev_x=s->mb_x, prev_y=s->mb_y;
             if(ff_h263_resync(s)<0)
                 break;
+            if (prev_y * s->mb_width + prev_x < s->mb_y * s->mb_width + s->mb_x)
+                s->error_occurred = 1;
         }
 
         if(s->msmpeg4_version<4 && s->h263_pred)
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index eb873a4855..0aa923fdbb 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -36,6 +36,7 @@
 #include "golomb.h"
 #include "mathops.h"
 #include "rectangle.h"
+#include "thread.h"
 #include "vdpau_internal.h"
 #include "libavutil/avassert.h"
 
@@ -245,6 +246,141 @@ static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
     return 0;
 }
 
+static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
+                                 int y_offset, int list){
+    int raw_my= h->mv_cache[list][ scan8[n] ][1];
+    int filter_height= (raw_my&3) ? 2 : 0;
+    int full_my= (raw_my>>2) + y_offset;
+    int top = full_my - filter_height, bottom = full_my + height + filter_height;
+
+    return FFMAX(abs(top), bottom);
+}
+
+static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
+                               int y_offset, int list0, int list1, int *nrefs){
+    MpegEncContext * const s = &h->s;
+    int my;
+
+    y_offset += 16*(s->mb_y >> MB_FIELD);
+
+    if(list0){
+        int ref_n = h->ref_cache[0][ scan8[n] ];
+        Picture *ref= &h->ref_list[0][ref_n];
+
+        // Error resilience puts the current picture in the ref list.
+        // Don't try to wait on these as it will cause a deadlock.
+        // Fields can wait on each other, though.
+        if(ref->thread_opaque != s->current_picture.thread_opaque ||
+           (ref->reference&3) != s->picture_structure) {
+            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
+            if (refs[0][ref_n] < 0) nrefs[0] += 1;
+            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
+        }
+    }
+
+    if(list1){
+        int ref_n = h->ref_cache[1][ scan8[n] ];
+        Picture *ref= &h->ref_list[1][ref_n];
+
+        if(ref->thread_opaque != s->current_picture.thread_opaque ||
+           (ref->reference&3) != s->picture_structure) {
+            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
+            if (refs[1][ref_n] < 0) nrefs[1] += 1;
+            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
+        }
+    }
+}
+
+/**
+ * Wait until all reference frames are available for MC operations.
+ *
+ * @param h the H264 context
+ */
+static void await_references(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    int refs[2][48];
+    int nrefs[2] = {0};
+    int ref, list;
+
+    memset(refs, -1, sizeof(refs));
+
+    if(IS_16X16(mb_type)){
+        get_lowest_part_y(h, refs, 0, 16, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+    }else if(IS_16X8(mb_type)){
+        get_lowest_part_y(h, refs, 0, 8, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+        get_lowest_part_y(h, refs, 8, 8, 8,
+                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+    }else if(IS_8X16(mb_type)){
+        get_lowest_part_y(h, refs, 0, 16, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+        get_lowest_part_y(h, refs, 4, 16, 0,
+                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+    }else{
+        int i;
+
+        assert(IS_8X8(mb_type));
+
+        for(i=0; i<4; i++){
+            const int sub_mb_type= h->sub_mb_type[i];
+            const int n= 4*i;
+            int y_offset= (i&2)<<2;
+
+            if(IS_SUB_8X8(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else if(IS_SUB_8X4(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 4, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else if(IS_SUB_4X8(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                get_lowest_part_y(h, refs, n+1, 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else{
+                int j;
+                assert(IS_SUB_4X4(sub_mb_type));
+                for(j=0; j<4; j++){
+                    int sub_y_offset= y_offset + 2*(j&2);
+                    get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
+                              IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                }
+            }
+        }
+    }
+
+    for(list=h->list_count-1; list>=0; list--){
+        for(ref=0; ref<48 && nrefs[list]; ref++){
+            int row = refs[list][ref];
+            if(row >= 0){
+                Picture *ref_pic = &h->ref_list[list][ref];
+                int ref_field = ref_pic->reference - 1;
+                int ref_field_picture = ref_pic->field_picture;
+                int pic_height = 16*s->mb_height >> ref_field_picture;
+
+                row <<= MB_MBAFF;
+                nrefs[list]--;
+
+                if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
+                }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
+                }else if(FIELD_PICTURE){
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
+                }else{
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
+                }
+            }
+        }
+    }
+}
+
 #if 0
 /**
  * DCT transforms the 16 dc values.
@@ -518,6 +654,8 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
 
     assert(IS_INTER(mb_type));
 
+    if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
+        await_references(h);
     prefetch_motion(h, 0, pixel_shift);
 
     if(IS_16X16(mb_type)){
@@ -626,6 +764,7 @@ hl_motion_fn(1, 16);
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
     H264Context *hx;
+
     av_freep(&h->intra4x4_pred_mode);
     av_freep(&h->chroma_pred_mode_table);
     av_freep(&h->cbp_table);
@@ -898,7 +1037,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
 
     h->thread_context[0] = h;
-    h->outputed_poc = INT_MIN;
+    h->outputed_poc = h->next_outputed_poc = INT_MIN;
     h->prev_poc_msb= 1<<16;
     h->x264_build = -1;
     ff_h264_reset_sei(h);
@@ -921,10 +1060,135 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     return 0;
 }
 
+#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
+static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
+{
+    int i;
+
+    for (i=0; i<count; i++){
+        assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
+                IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
+                !from[i]));
+        to[i] = REBASE_PICTURE(from[i], new_base, old_base);
+    }
+}
+
+static void copy_parameter_set(void **to, void **from, int count, int size)
+{
+    int i;
+
+    for (i=0; i<count; i++){
+        if (to[i] && !from[i]) av_freep(&to[i]);
+        else if (from[i] && !to[i]) to[i] = av_malloc(size);
+
+        if (from[i]) memcpy(to[i], from[i], size);
+    }
+}
+
+static int decode_init_thread_copy(AVCodecContext *avctx){
+    H264Context *h= avctx->priv_data;
+
+    if (!avctx->is_copy) return 0;
+    memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
+    memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
+
+    return 0;
+}
+
+#define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
+static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
+    H264Context *h= dst->priv_data, *h1= src->priv_data;
+    MpegEncContext * const s = &h->s, * const s1 = &h1->s;
+    int inited = s->context_initialized, err;
+    int i;
+
+    if(dst == src || !s1->context_initialized) return 0;
+
+    err = ff_mpeg_update_thread_context(dst, src);
+    if(err) return err;
+
+    //FIXME handle width/height changing
+    if(!inited){
+        for(i = 0; i < MAX_SPS_COUNT; i++)
+            av_freep(h->sps_buffers + i);
+
+        for(i = 0; i < MAX_PPS_COUNT; i++)
+            av_freep(h->pps_buffers + i);
+
+        memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
+        memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
+        memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
+        ff_h264_alloc_tables(h);
+        context_init(h);
+
+        for(i=0; i<2; i++){
+            h->rbsp_buffer[i] = NULL;
+            h->rbsp_buffer_size[i] = 0;
+        }
+
+        h->thread_context[0] = h;
+
+        // frame_start may not be called for the next thread (if it's decoding a bottom field)
+        // so this has to be allocated here
+        h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+
+        s->dsp.clear_blocks(h->mb);
+    }
+
+    //extradata/NAL handling
+    h->is_avc          = h1->is_avc;
+
+    //SPS/PPS
+    copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
+    h->sps             = h1->sps;
+    copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
+    h->pps             = h1->pps;
+
+    //Dequantization matrices
+    //FIXME these are big - can they be only copied when PPS changes?
+    copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
+
+    for(i=0; i<6; i++)
+        h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
+
+    for(i=0; i<2; i++)
+        h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
+
+    h->dequant_coeff_pps = h1->dequant_coeff_pps;
+
+    //POC timing
+    copy_fields(h, h1, poc_lsb, redundant_pic_count);
+
+    //reference lists
+    copy_fields(h, h1, ref_count, list_count);
+    copy_fields(h, h1, ref_list,  intra_gb);
+    copy_fields(h, h1, short_ref, cabac_init_idc);
+
+    copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
+    copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
+    copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
+
+    h->last_slice_type = h1->last_slice_type;
+
+    if(!s->current_picture_ptr) return 0;
+
+    if(!s->dropable) {
+        ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+        h->prev_poc_msb     = h->poc_msb;
+        h->prev_poc_lsb     = h->poc_lsb;
+    }
+    h->prev_frame_num_offset= h->frame_num_offset;
+    h->prev_frame_num       = h->frame_num;
+    h->outputed_poc         = h->next_outputed_poc;
+
+    return 0;
+}
+
 int ff_h264_frame_start(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
     const int pixel_shift = h->pixel_shift;
+    int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
 
     if(MPV_frame_start(s, s->avctx) < 0)
         return -1;
@@ -953,7 +1217,7 @@ int ff_h264_frame_start(H264Context *h){
 
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
-    for(i = 0; i < s->avctx->thread_count; i++)
+    for(i = 0; i < thread_count; i++)
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
 
@@ -973,11 +1237,173 @@ int ff_h264_frame_start(H264Context *h){
 
     s->current_picture_ptr->field_poc[0]=
     s->current_picture_ptr->field_poc[1]= INT_MAX;
+
+    h->next_output_pic = NULL;
+
     assert(s->current_picture_ptr->long_ref==0);
 
     return 0;
 }
 
+/**
+  * Run setup operations that must be run after slice header decoding.
+  * This includes finding the next displayed frame.
+  *
+  * @param h h264 master context
+  * @param setup_finished enough NALs have been read that we can call
+  * ff_thread_finish_setup()
+  */
+static void decode_postinit(H264Context *h, int setup_finished){
+    MpegEncContext * const s = &h->s;
+    Picture *out = s->current_picture_ptr;
+    Picture *cur = s->current_picture_ptr;
+    int i, pics, out_of_order, out_idx;
+
+    s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
+    s->current_picture_ptr->pict_type= s->pict_type;
+
+    if (h->next_output_pic) return;
+
+    if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
+        //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
+        //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
+        //to find this yet, so we assume the worst for now.
+        //if (setup_finished)
+        //    ff_thread_finish_setup(s->avctx);
+        return;
+    }
+
+    cur->interlaced_frame = 0;
+    cur->repeat_pict = 0;
+
+    /* Signal interlacing information externally. */
+    /* Prioritize picture timing SEI information over used decoding process if it exists. */
+
+    if(h->sps.pic_struct_present_flag){
+        switch (h->sei_pic_struct)
+        {
+        case SEI_PIC_STRUCT_FRAME:
+            break;
+        case SEI_PIC_STRUCT_TOP_FIELD:
+        case SEI_PIC_STRUCT_BOTTOM_FIELD:
+            cur->interlaced_frame = 1;
+            break;
+        case SEI_PIC_STRUCT_TOP_BOTTOM:
+        case SEI_PIC_STRUCT_BOTTOM_TOP:
+            if (FIELD_OR_MBAFF_PICTURE)
+                cur->interlaced_frame = 1;
+            else
+                // try to flag soft telecine progressive
+                cur->interlaced_frame = h->prev_interlaced_frame;
+            break;
+        case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
+        case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
+            // Signal the possibility of telecined film externally (pic_struct 5,6)
+            // From these hints, let the applications decide if they apply deinterlacing.
+            cur->repeat_pict = 1;
+            break;
+        case SEI_PIC_STRUCT_FRAME_DOUBLING:
+            // Force progressive here, as doubling interlaced frame is a bad idea.
+            cur->repeat_pict = 2;
+            break;
+        case SEI_PIC_STRUCT_FRAME_TRIPLING:
+            cur->repeat_pict = 4;
+            break;
+        }
+
+        if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
+            cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
+    }else{
+        /* Derive interlacing flag from used decoding process. */
+        cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
+    }
+    h->prev_interlaced_frame = cur->interlaced_frame;
+
+    if (cur->field_poc[0] != cur->field_poc[1]){
+        /* Derive top_field_first from field pocs. */
+        cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
+    }else{
+        if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
+            /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
+            if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
+              || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
+                cur->top_field_first = 1;
+            else
+                cur->top_field_first = 0;
+        }else{
+            /* Most likely progressive */
+            cur->top_field_first = 0;
+        }
+    }
+
+    //FIXME do something with unavailable reference frames
+
+    /* Sort B-frames into display order */
+
+    if(h->sps.bitstream_restriction_flag
+       && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+        s->avctx->has_b_frames = h->sps.num_reorder_frames;
+        s->low_delay = 0;
+    }
+
+    if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
+       && !h->sps.bitstream_restriction_flag){
+        s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
+        s->low_delay= 0;
+    }
+
+    pics = 0;
+    while(h->delayed_pic[pics]) pics++;
+
+    assert(pics <= MAX_DELAYED_PIC_COUNT);
+
+    h->delayed_pic[pics++] = cur;
+    if(cur->reference == 0)
+        cur->reference = DELAYED_PIC_REF;
+
+    out = h->delayed_pic[0];
+    out_idx = 0;
+    for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
+        if(h->delayed_pic[i]->poc < out->poc){
+            out = h->delayed_pic[i];
+            out_idx = i;
+        }
+    if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
+        h->next_outputed_poc= INT_MIN;
+    out_of_order = out->poc < h->next_outputed_poc;
+
+    if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
+        { }
+    else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
+       || (s->low_delay &&
+        ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
+         || cur->pict_type == AV_PICTURE_TYPE_B)))
+    {
+        s->low_delay = 0;
+        s->avctx->has_b_frames++;
+    }
+
+    if(out_of_order || pics > s->avctx->has_b_frames){
+        out->reference &= ~DELAYED_PIC_REF;
+        out->owner2 = s; // for frame threading, the owner must be the second field's thread
+                         // or else the first thread can release the picture and reuse it unsafely
+        for(i=out_idx; h->delayed_pic[i]; i++)
+            h->delayed_pic[i] = h->delayed_pic[i+1];
+    }
+    if(!out_of_order && pics > s->avctx->has_b_frames){
+        h->next_output_pic = out;
+        if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
+            h->next_outputed_poc = INT_MIN;
+        } else
+            h->next_outputed_poc = out->poc;
+    }else{
+        av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
+    }
+
+    if (setup_finished)
+        ff_thread_finish_setup(s->avctx);
+}
+
 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
@@ -1573,7 +1999,7 @@ static void flush_dpb(AVCodecContext *avctx){
             h->delayed_pic[i]->reference= 0;
         h->delayed_pic[i]= NULL;
     }
-    h->outputed_poc= INT_MIN;
+    h->outputed_poc=h->next_outputed_poc= INT_MIN;
     h->prev_interlaced_frame = 1;
     idr(h);
     if(h->s.current_picture_ptr)
@@ -1697,24 +2123,28 @@ static void init_scan_tables(H264Context *h){
     }
 }
 
-static void field_end(H264Context *h){
+static void field_end(H264Context *h, int in_setup){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
     s->mb_y= 0;
 
-    s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
-    s->current_picture_ptr->pict_type= s->pict_type;
+    if (!in_setup && !s->dropable)
+        ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
+                                 s->picture_structure==PICT_BOTTOM_FIELD);
 
     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
         ff_vdpau_h264_set_reference_frames(s);
 
-    if(!s->dropable) {
-        ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
-        h->prev_poc_msb= h->poc_msb;
-        h->prev_poc_lsb= h->poc_lsb;
+    if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
+        if(!s->dropable) {
+            ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+            h->prev_poc_msb= h->poc_msb;
+            h->prev_poc_lsb= h->poc_lsb;
+        }
+        h->prev_frame_num_offset= h->frame_num_offset;
+        h->prev_frame_num= h->frame_num;
+        h->outputed_poc = h->next_outputed_poc;
     }
-    h->prev_frame_num_offset= h->frame_num_offset;
-    h->prev_frame_num= h->frame_num;
 
     if (avctx->hwaccel) {
         if (avctx->hwaccel->end_frame(avctx) < 0)
@@ -1831,7 +2261,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
         if(h0->current_slice && FIELD_PICTURE){
-            field_end(h);
+            field_end(h, 1);
         }
 
         h0->current_slice = 0;
@@ -1900,8 +2330,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height
             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
-        if(h != h0)
+        if(h != h0) {
+            av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
             return -1;   // width / height changed during parallelized decoding
+        }
         free_tables(h, 0);
         flush_dpb(s->avctx);
         MPV_common_end(s);
@@ -1960,22 +2392,27 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         init_scan_tables(h);
         ff_h264_alloc_tables(h);
 
-        for(i = 1; i < s->avctx->thread_count; i++) {
-            H264Context *c;
-            c = h->thread_context[i] = av_malloc(sizeof(H264Context));
-            memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
-            memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
-            c->h264dsp = h->h264dsp;
-            c->sps = h->sps;
-            c->pps = h->pps;
-            c->pixel_shift = h->pixel_shift;
-            init_scan_tables(c);
-            clone_tables(c, h, i);
-        }
-
-        for(i = 0; i < s->avctx->thread_count; i++)
-            if(context_init(h->thread_context[i]) < 0)
+        if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
+            if (context_init(h) < 0)
                 return -1;
+        } else {
+            for(i = 1; i < s->avctx->thread_count; i++) {
+                H264Context *c;
+                c = h->thread_context[i] = av_malloc(sizeof(H264Context));
+                memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
+                memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+                c->h264dsp = h->h264dsp;
+                c->sps = h->sps;
+                c->pps = h->pps;
+                c->pixel_shift = h->pixel_shift;
+                init_scan_tables(c);
+                clone_tables(c, h, i);
+            }
+
+            for(i = 0; i < s->avctx->thread_count; i++)
+                if(context_init(h->thread_context[i]) < 0)
+                    return -1;
+        }
     }
 
     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
@@ -1996,6 +2433,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
 
     if(h0->current_slice == 0){
+        if(h->frame_num != h->prev_frame_num &&
+          (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num) < (h->frame_num - h->sps.ref_frame_count))
+            h->prev_frame_num = h->frame_num - h->sps.ref_frame_count - 1;
+
         while(h->frame_num !=  h->prev_frame_num &&
               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
@@ -2005,6 +2446,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             h->prev_frame_num++;
             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
             s->current_picture_ptr->frame_num= h->prev_frame_num;
+            ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
+            ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
             ff_generate_sliding_window_mmcos(h);
             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
             /* Error concealment: if a ref is missing, copy the previous ref in its place.
@@ -2064,9 +2507,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             s0->first_field = FIELD_PICTURE;
         }
 
-        if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
-            s0->first_field = 0;
-            return -1;
+        if(!FIELD_PICTURE || s0->first_field) {
+            if (ff_h264_frame_start(h) < 0) {
+                s0->first_field = 0;
+                return -1;
+            }
+        } else {
+            ff_release_unused_pictures(s, 0);
         }
     }
     if(h != h0)
@@ -2309,7 +2756,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                           +(h->ref_list[j][i].reference&3);
     }
 
-    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
+    //FIXME: fix draw_edges+PAFF+frame threads
+    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
@@ -2631,6 +3079,40 @@ static void predict_field_decoding_flag(H264Context *h){
     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
 }
 
+/**
+ * Draw edges and report progress for the last MB row.
+ */
+static void decode_finish_row(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int top = 16*(s->mb_y >> FIELD_PICTURE);
+    int height = 16 << FRAME_MBAFF;
+    int deblock_border = (16 + 4) << FRAME_MBAFF;
+    int pic_height = 16*s->mb_height >> FIELD_PICTURE;
+
+    if (h->deblocking_filter) {
+        if((top + height) >= pic_height)
+            height += deblock_border;
+
+        top -= deblock_border;
+    }
+
+    if (top >= pic_height || (top + height) < h->emu_edge_height)
+        return;
+
+    height = FFMIN(height, pic_height - top);
+    if (top < h->emu_edge_height) {
+        height = top+height;
+        top = 0;
+    }
+
+    ff_draw_horiz_band(s, top, height);
+
+    if (s->dropable) return;
+
+    ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
+                             s->picture_structure==PICT_BOTTOM_FIELD);
+}
+
 static int decode_slice(struct AVCodecContext *avctx, void *arg){
     H264Context *h = *(void**)arg;
     MpegEncContext * const s = &h->s;
@@ -2686,7 +3168,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if( ++s->mb_x >= s->mb_width ) {
                 loop_filter(h, lf_x_start, s->mb_x);
                 s->mb_x = lf_x_start = 0;
-                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                decode_finish_row(h);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
                     ++s->mb_y;
@@ -2726,7 +3208,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if(++s->mb_x >= s->mb_width){
                 loop_filter(h, lf_x_start, s->mb_x);
                 s->mb_x = lf_x_start = 0;
-                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                decode_finish_row(h);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
                     ++s->mb_y;
@@ -2855,12 +3337,15 @@ static void execute_decode_slices(H264Context *h, int context_count){
 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
-    int buf_index=0;
     H264Context *hx; ///< thread context
-    int context_count = 0;
-    int next_avc= h->is_avc ? 0 : buf_size;
+    int buf_index;
+    int context_count;
+    int next_avc;
+    int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
+    int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts
+    int nal_index;
 
-    h->max_contexts = avctx->thread_count;
+    h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
         h->current_slice = 0;
         if (!s->first_field)
@@ -2868,6 +3353,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         ff_h264_reset_sei(h);
     }
 
+    for(;pass <= 1;pass++){
+        buf_index = 0;
+        context_count = 0;
+        next_avc = h->is_avc ? 0 : buf_size;
+        nal_index = 0;
     for(;;){
         int consumed;
         int dst_length;
@@ -2926,6 +3416,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         }
 
         buf_index += consumed;
+        nal_index++;
+
+        if(pass == 0) {
+            // packets can sometimes contain multiple PPS/SPS
+            // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
+            // if so, when frame threading we can't start the next thread until we've read all of them
+            switch (hx->nal_unit_type) {
+                case NAL_SPS:
+                case NAL_PPS:
+                    nals_needed = nal_index;
+            }
+            continue;
+        }
 
         //FIXME do not discard SEI id
         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
@@ -2949,16 +3452,21 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             if((err = decode_slice_header(hx, h)))
                break;
 
+            s->current_picture_ptr->key_frame |=
+                    (hx->nal_unit_type == NAL_IDR_SLICE) ||
+                    (h->sei_recovery_frame_cnt >= 0);
+
             if (h->current_slice == 1) {
+                if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
+                    decode_postinit(h, nal_index >= nals_needed);
+                }
+
                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
                     return -1;
                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
                     ff_vdpau_h264_picture_start(s);
             }
 
-            s->current_picture_ptr->key_frame |=
-                    (hx->nal_unit_type == NAL_IDR_SLICE) ||
-                    (h->sei_recovery_frame_cnt >= 0);
             if(hx->redundant_pic_count==0
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
@@ -3066,6 +3574,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             goto again;
         }
     }
+    }
     if(context_count)
         execute_decode_slices(h, context_count);
     return buf_index;
@@ -3101,6 +3610,8 @@ static int decode_frame(AVCodecContext *avctx,
         Picture *out;
         int i, out_idx;
 
+        s->current_picture_ptr = NULL;
+
 //FIXME factorize this with the output code below
         out = h->delayed_pic[0];
         out_idx = 0;
@@ -3138,143 +3649,18 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
-        Picture *out = s->current_picture_ptr;
-        Picture *cur = s->current_picture_ptr;
-        int i, pics, out_of_order, out_idx;
 
-        field_end(h);
+        if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
 
-        if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
+        field_end(h, 0);
+
+        if (!h->next_output_pic) {
             /* Wait for second field. */
             *data_size = 0;
 
         } else {
-            cur->interlaced_frame = 0;
-            cur->repeat_pict = 0;
-
-            /* Signal interlacing information externally. */
-            /* Prioritize picture timing SEI information over used decoding process if it exists. */
-
-            if(h->sps.pic_struct_present_flag){
-                switch (h->sei_pic_struct)
-                {
-                case SEI_PIC_STRUCT_FRAME:
-                    break;
-                case SEI_PIC_STRUCT_TOP_FIELD:
-                case SEI_PIC_STRUCT_BOTTOM_FIELD:
-                    cur->interlaced_frame = 1;
-                    break;
-                case SEI_PIC_STRUCT_TOP_BOTTOM:
-                case SEI_PIC_STRUCT_BOTTOM_TOP:
-                    if (FIELD_OR_MBAFF_PICTURE)
-                        cur->interlaced_frame = 1;
-                    else
-                        // try to flag soft telecine progressive
-                        cur->interlaced_frame = h->prev_interlaced_frame;
-                    break;
-                case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
-                case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
-                    // Signal the possibility of telecined film externally (pic_struct 5,6)
-                    // From these hints, let the applications decide if they apply deinterlacing.
-                    cur->repeat_pict = 1;
-                    break;
-                case SEI_PIC_STRUCT_FRAME_DOUBLING:
-                    // Force progressive here, as doubling interlaced frame is a bad idea.
-                    cur->repeat_pict = 2;
-                    break;
-                case SEI_PIC_STRUCT_FRAME_TRIPLING:
-                    cur->repeat_pict = 4;
-                    break;
-                }
-
-                if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
-                    cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
-            }else{
-                /* Derive interlacing flag from used decoding process. */
-                cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
-            }
-            h->prev_interlaced_frame = cur->interlaced_frame;
-
-            if (cur->field_poc[0] != cur->field_poc[1]){
-                /* Derive top_field_first from field pocs. */
-                cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
-            }else{
-                if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
-                    /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
-                    if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
-                      || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
-                        cur->top_field_first = 1;
-                    else
-                        cur->top_field_first = 0;
-                }else{
-                    /* Most likely progressive */
-                    cur->top_field_first = 0;
-                }
-            }
-
-        //FIXME do something with unavailable reference frames
-
-            /* Sort B-frames into display order */
-
-            if(h->sps.bitstream_restriction_flag
-               && s->avctx->has_b_frames < h->sps.num_reorder_frames){
-                s->avctx->has_b_frames = h->sps.num_reorder_frames;
-                s->low_delay = 0;
-            }
-
-            if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
-               && !h->sps.bitstream_restriction_flag){
-                s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
-                s->low_delay= 0;
-            }
-
-            pics = 0;
-            while(h->delayed_pic[pics]) pics++;
-
-            assert(pics <= MAX_DELAYED_PIC_COUNT);
-
-            h->delayed_pic[pics++] = cur;
-            if(cur->reference == 0)
-                cur->reference = DELAYED_PIC_REF;
-
-            out = h->delayed_pic[0];
-            out_idx = 0;
-            for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
-                if(h->delayed_pic[i]->poc < out->poc){
-                    out = h->delayed_pic[i];
-                    out_idx = i;
-                }
-            if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
-                h->outputed_poc= INT_MIN;
-            out_of_order = out->poc < h->outputed_poc;
-
-            if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
-                { }
-            else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
-               || (s->low_delay &&
-                ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
-                 || cur->pict_type == AV_PICTURE_TYPE_B)))
-            {
-                s->low_delay = 0;
-                s->avctx->has_b_frames++;
-            }
-
-            if(out_of_order || pics > s->avctx->has_b_frames){
-                out->reference &= ~DELAYED_PIC_REF;
-                for(i=out_idx; h->delayed_pic[i]; i++)
-                    h->delayed_pic[i] = h->delayed_pic[i+1];
-            }
-            if(!out_of_order && pics > s->avctx->has_b_frames){
-                *data_size = sizeof(AVFrame);
-
-                if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
-                    h->outputed_poc = INT_MIN;
-                } else
-                    h->outputed_poc = out->poc;
-                *pict= *(AVFrame*)out;
-            }else{
-                av_log(avctx, AV_LOG_DEBUG, "no picture\n");
-            }
+            *data_size = sizeof(AVFrame);
+            *pict = *(AVFrame*)h->next_output_pic;
         }
     }
 
@@ -3534,9 +3920,11 @@ AVCodec ff_h264_decoder = {
     ff_h264_decode_end,
     decode_frame,
     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-        CODEC_CAP_SLICE_THREADS,
+        CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
     .flush= flush_dpb,
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
 };
 
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 035927a09a..5c275e2de0 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -506,7 +506,9 @@ typedef struct H264Context{
     Picture *long_ref[32];
     Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
     Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
+    Picture *next_output_pic;
     int outputed_poc;
+    int next_outputed_poc;
 
     /**
      * memory management control operations buffer.
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 3c7f57aa0c..a7e6853b5c 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -31,6 +31,7 @@
 #include "mpegvideo.h"
 #include "h264.h"
 #include "rectangle.h"
+#include "thread.h"
 
 //#undef NDEBUG
 #include <assert.h>
@@ -126,7 +127,7 @@ void ff_h264_direct_ref_list_init(H264Context * const h){
         h->col_parity= (FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc));
         ref1sidx=sidx= h->col_parity;
     }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){ // FL -> FL & differ parity
-        h->col_fieldoff= s->mb_stride*(2*(h->ref_list[1][0].reference) - 3);
+        h->col_fieldoff= 2*(h->ref_list[1][0].reference) - 3;
     }
 
     if(cur->pict_type != AV_PICTURE_TYPE_B || h->direct_spatial_mv_pred)
@@ -140,11 +141,27 @@ void ff_h264_direct_ref_list_init(H264Context * const h){
     }
 }
 
+static void await_reference_mb_row(H264Context * const h, Picture *ref, int mb_y)
+{
+    int ref_field = ref->reference - 1;
+    int ref_field_picture = ref->field_picture;
+    int ref_height = 16*h->s.mb_height >> ref_field_picture;
+
+    if(!HAVE_PTHREADS || !(h->s.avctx->active_thread_type&FF_THREAD_FRAME))
+        return;
+
+    //FIXME it can be safe to access mb stuff
+    //even if pixels aren't deblocked yet
+
+    ff_thread_await_progress((AVFrame*)ref, FFMIN(16*mb_y >> ref_field_picture, ref_height-1),
+                             ref_field_picture && ref_field);
+}
+
 static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
     MpegEncContext * const s = &h->s;
     int b8_stride = 2;
     int b4_stride = h->b_stride;
-    int mb_xy = h->mb_xy;
+    int mb_xy = h->mb_xy, mb_y = s->mb_y;
     int mb_type_col[2];
     const int16_t (*l1mv0)[2], (*l1mv1)[2];
     const int8_t *l1ref0, *l1ref1;
@@ -157,6 +174,8 @@ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
 
     assert(h->ref_list[1][0].reference&3);
 
+    await_reference_mb_row(h, &h->ref_list[1][0], s->mb_y + !!IS_INTERLACED(*mb_type));
+
 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
 
 
@@ -217,14 +236,17 @@ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
 
     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
+            mb_y = (s->mb_y&~1) + h->col_parity;
             mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride;
             b8_stride = 0;
         }else{
-            mb_xy += h->col_fieldoff; // non zero for FL -> FL & differ parity
+            mb_y  += h->col_fieldoff;
+            mb_xy += s->mb_stride*h->col_fieldoff; // non zero for FL -> FL & differ parity
         }
         goto single_col;
     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
+            mb_y = s->mb_y&~1;
             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
@@ -260,6 +282,8 @@ single_col:
         }
     }
 
+    await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
+
     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
     l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
@@ -384,7 +408,7 @@ static void pred_temp_direct_motion(H264Context * const h, int *mb_type){
     MpegEncContext * const s = &h->s;
     int b8_stride = 2;
     int b4_stride = h->b_stride;
-    int mb_xy = h->mb_xy;
+    int mb_xy = h->mb_xy, mb_y = s->mb_y;
     int mb_type_col[2];
     const int16_t (*l1mv0)[2], (*l1mv1)[2];
     const int8_t *l1ref0, *l1ref1;
@@ -394,16 +418,21 @@ static void pred_temp_direct_motion(H264Context * const h, int *mb_type){
 
     assert(h->ref_list[1][0].reference&3);
 
+    await_reference_mb_row(h, &h->ref_list[1][0], s->mb_y + !!IS_INTERLACED(*mb_type));
+
     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
+            mb_y = (s->mb_y&~1) + h->col_parity;
             mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride;
             b8_stride = 0;
         }else{
-            mb_xy += h->col_fieldoff; // non zero for FL -> FL & differ parity
+            mb_y  += h->col_fieldoff;
+            mb_xy += s->mb_stride*h->col_fieldoff; // non zero for FL -> FL & differ parity
         }
         goto single_col;
     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
+            mb_y = s->mb_y&~1;
             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
@@ -440,6 +469,8 @@ single_col:
         }
     }
 
+    await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
+
     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
     l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 88ed5332ae..82c2987b90 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -37,6 +37,7 @@
 #include "bytestream.h"
 #include "vdpau_internal.h"
 #include "xvmc_internal.h"
+#include "thread.h"
 
 //#undef NDEBUG
 //#include <assert.h>
@@ -1179,6 +1180,27 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static int mpeg_decode_update_thread_context(AVCodecContext *avctx, const AVCodecContext *avctx_from)
+{
+    Mpeg1Context *ctx = avctx->priv_data, *ctx_from = avctx_from->priv_data;
+    MpegEncContext *s = &ctx->mpeg_enc_ctx, *s1 = &ctx_from->mpeg_enc_ctx;
+    int err;
+
+    if(avctx == avctx_from || !ctx_from->mpeg_enc_ctx_allocated || !s1->context_initialized)
+        return 0;
+
+    err = ff_mpeg_update_thread_context(avctx, avctx_from);
+    if(err) return err;
+
+    if(!ctx->mpeg_enc_ctx_allocated)
+        memcpy(s + 1, s1 + 1, sizeof(Mpeg1Context) - sizeof(MpegEncContext));
+
+    if(!(s->pict_type == FF_B_TYPE || s->low_delay))
+        s->picture_number++;
+
+    return 0;
+}
+
 static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
                                      const uint8_t *new_perm){
     uint16_t temp_matrix[64];
@@ -1595,6 +1617,9 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
         }
 
         *s->current_picture_ptr->pan_scan= s1->pan_scan;
+
+        if (HAVE_PTHREADS && (avctx->active_thread_type & FF_THREAD_FRAME))
+            ff_thread_finish_setup(avctx);
     }else{ //second field
             int i;
 
@@ -1769,6 +1794,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
             const int mb_size= 16>>s->avctx->lowres;
 
             ff_draw_horiz_band(s, mb_size*(s->mb_y>>field_pic), mb_size);
+            MPV_report_decode_progress(s);
 
             s->mb_x = 0;
             s->mb_y += 1<<field_pic;
@@ -1926,7 +1952,8 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
             *pict= *(AVFrame*)s->current_picture_ptr;
             ff_print_debug_info(s, pict);
         } else {
-            s->picture_number++;
+            if (avctx->active_thread_type & FF_THREAD_FRAME)
+                s->picture_number++;
             /* latency of 1 frame for I- and P-frames */
             /* XXX: use another variable than picture_number */
             if (s->last_picture_ptr != NULL) {
@@ -2262,7 +2289,7 @@ static int decode_chunks(AVCodecContext *avctx,
         buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code);
         if (start_code > 0x1ff){
             if(s2->pict_type != AV_PICTURE_TYPE_B || avctx->skip_frame <= AVDISCARD_DEFAULT){
-                if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1){
+                if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){
                     int i;
 
                     avctx->execute(avctx, slice_decode_thread,  &s2->thread_context[0], NULL, s->slice_count, sizeof(void*));
@@ -2430,7 +2457,7 @@ static int decode_chunks(AVCodecContext *avctx,
                     break;
                 }
 
-                if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1){
+                if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){
                     int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
                     if(threshold <= mb_y){
                         MpegEncContext *thread_context= s2->thread_context[s->slice_count];
@@ -2505,6 +2532,7 @@ AVCodec ff_mpeg1video_decoder = {
     .flush= flush,
     .max_lowres= 3,
     .long_name= NULL_IF_CONFIG_SMALL("MPEG-1 video"),
+    .update_thread_context= ONLY_IF_THREADS_ENABLED(mpeg_decode_update_thread_context)
 };
 
 AVCodec ff_mpeg2video_decoder = {
@@ -2541,7 +2569,7 @@ AVCodec ff_mpegvideo_decoder = {
 
 #if CONFIG_MPEG_XVMC_DECODER
 static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx){
-    if((avctx->active_thread_type & FF_THREAD_SLICE) && avctx->thread_count > 1)
+    if( avctx->active_thread_type & FF_THREAD_SLICE )
         return -1;
     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
         return -1;
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 66d4127884..81f09c5a4b 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -23,6 +23,7 @@
 #include "mpegvideo.h"
 #include "mpeg4video.h"
 #include "h263.h"
+#include "thread.h"
 
 // The defines below define the number of bits that are read at once for
 // reading vlc values. Changing these may improve speed and data cache needs
@@ -373,7 +374,13 @@ int mpeg4_decode_video_packet_header(MpegEncContext *s)
         return -1;
     }
     if(s->pict_type == AV_PICTURE_TYPE_B){
-        while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++;
+        int mb_x = 0, mb_y = 0;
+
+        while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) {
+            if (!mb_x) ff_thread_await_progress((AVFrame*)s->next_picture_ptr, mb_y++, 0);
+            mb_num++;
+            if (++mb_x == s->mb_width) mb_x = 0;
+        }
         if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where already decoded
     }
 
@@ -1303,6 +1310,8 @@ static int mpeg4_decode_mb(MpegEncContext *s,
                 s->last_mv[i][1][0]=
                 s->last_mv[i][1][1]= 0;
             }
+
+            ff_thread_await_progress((AVFrame*)s->next_picture_ptr, s->mb_y, 0);
         }
 
         /* if we skipped it in the future P Frame than skip it now too */
@@ -1482,6 +1491,12 @@ end:
     if(s->codec_id==CODEC_ID_MPEG4){
         if(mpeg4_is_resync(s)){
             const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
+
+            if(s->pict_type==AV_PICTURE_TYPE_B && s->next_picture.mbskip_table[xy + delta]){
+                ff_thread_await_progress((AVFrame*)s->next_picture_ptr,
+                                        (s->mb_x + delta >= s->mb_width) ? FFMIN(s->mb_y+1, s->mb_height-1) : s->mb_y, 0);
+            }
+
             if(s->pict_type==AV_PICTURE_TYPE_B && s->next_picture.mbskip_table[xy + delta])
                 return SLICE_OK;
             return SLICE_END;
@@ -2235,11 +2250,12 @@ AVCodec ff_mpeg4_decoder = {
     NULL,
     ff_h263_decode_end,
     ff_h263_decode_frame,
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY | CODEC_CAP_FRAME_THREADS,
     .flush= ff_mpeg_flush,
     .max_lowres= 3,
     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
     .pix_fmts= ff_hwaccel_pixfmt_list_420,
+    .update_thread_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_thread_context)
 };
 
 
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index f24e11910a..dfb2289201 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -38,6 +38,7 @@
 #include "msmpeg4.h"
 #include "faandct.h"
 #include "xvmc_internal.h"
+#include "thread.h"
 #include <limits.h>
 
 //#undef NDEBUG
@@ -205,7 +206,7 @@ void ff_copy_picture(Picture *dst, Picture *src){
  */
 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 {
-    s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
+    ff_thread_release_buffer(s->avctx, (AVFrame*)pic);
     av_freep(&pic->hwaccel_picture_private);
 }
 
@@ -227,7 +228,7 @@ static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
         }
     }
 
-    r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
+    r = ff_thread_get_buffer(s->avctx, (AVFrame*)pic);
 
     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
@@ -315,6 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type;
     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B)
         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
+    pic->owner2 = NULL;
 
     return 0;
 fail: //for the FF_ALLOCZ_OR_GOTO macro
@@ -458,6 +460,81 @@ void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 }
 
+int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    MpegEncContext *s = dst->priv_data, *s1 = src->priv_data;
+
+    if(dst == src || !s1->context_initialized) return 0;
+
+    //FIXME can parameters change on I-frames? in that case dst may need a reinit
+    if(!s->context_initialized){
+        memcpy(s, s1, sizeof(MpegEncContext));
+
+        s->avctx                 = dst;
+        s->picture_range_start  += MAX_PICTURE_COUNT;
+        s->picture_range_end    += MAX_PICTURE_COUNT;
+        s->bitstream_buffer      = NULL;
+        s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0;
+
+        MPV_common_init(s);
+    }
+
+    s->avctx->coded_height  = s1->avctx->coded_height;
+    s->avctx->coded_width   = s1->avctx->coded_width;
+    s->avctx->width         = s1->avctx->width;
+    s->avctx->height        = s1->avctx->height;
+
+    s->coded_picture_number = s1->coded_picture_number;
+    s->picture_number       = s1->picture_number;
+    s->input_picture_number = s1->input_picture_number;
+
+    memcpy(s->picture, s1->picture, s1->picture_count * sizeof(Picture));
+    memcpy(&s->last_picture, &s1->last_picture, (char*)&s1->last_picture_ptr - (char*)&s1->last_picture);
+
+    s->last_picture_ptr     = REBASE_PICTURE(s1->last_picture_ptr,    s, s1);
+    s->current_picture_ptr  = REBASE_PICTURE(s1->current_picture_ptr, s, s1);
+    s->next_picture_ptr     = REBASE_PICTURE(s1->next_picture_ptr,    s, s1);
+
+    memcpy(s->prev_pict_types, s1->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
+
+    //Error/bug resilience
+    s->next_p_frame_damaged = s1->next_p_frame_damaged;
+    s->workaround_bugs      = s1->workaround_bugs;
+
+    //MPEG4 timing info
+    memcpy(&s->time_increment_bits, &s1->time_increment_bits, (char*)&s1->shape - (char*)&s1->time_increment_bits);
+
+    //B-frame info
+    s->max_b_frames         = s1->max_b_frames;
+    s->low_delay            = s1->low_delay;
+    s->dropable             = s1->dropable;
+
+    //DivX handling (doesn't work)
+    s->divx_packed          = s1->divx_packed;
+
+    if(s1->bitstream_buffer){
+        if (s1->bitstream_buffer_size + FF_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size)
+            av_fast_malloc(&s->bitstream_buffer, &s->allocated_bitstream_buffer_size, s1->allocated_bitstream_buffer_size);
+        s->bitstream_buffer_size  = s1->bitstream_buffer_size;
+        memcpy(s->bitstream_buffer, s1->bitstream_buffer, s1->bitstream_buffer_size);
+        memset(s->bitstream_buffer+s->bitstream_buffer_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    }
+
+    //MPEG2/interlacing info
+    memcpy(&s->progressive_sequence, &s1->progressive_sequence, (char*)&s1->rtp_mode - (char*)&s1->progressive_sequence);
+
+    if(!s1->first_field){
+        s->last_pict_type= s1->pict_type;
+        if (s1->current_picture_ptr) s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->quality;
+
+        if(s1->pict_type!=FF_B_TYPE){
+            s->last_non_b_pict_type= s1->pict_type;
+        }
+    }
+
+    return 0;
+}
+
 /**
  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
  * the changed fields will not depend upon the prior state of the MpegEncContext.
@@ -478,6 +555,9 @@ void MPV_common_defaults(MpegEncContext *s){
 
     s->f_code = 1;
     s->b_code = 1;
+
+    s->picture_range_start = 0;
+    s->picture_range_end = MAX_PICTURE_COUNT;
 }
 
 /**
@@ -506,7 +586,8 @@ av_cold int MPV_common_init(MpegEncContext *s)
         return -1;
     }
 
-    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
+    if((s->avctx->active_thread_type & FF_THREAD_SLICE) &&
+       (s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){
         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
         return -1;
     }
@@ -602,8 +683,9 @@ av_cold int MPV_common_init(MpegEncContext *s)
         }
     }
 
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
-    for(i = 0; i < MAX_PICTURE_COUNT; i++) {
+    s->picture_count = MAX_PICTURE_COUNT * FFMAX(1, s->avctx->thread_count);
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, s->picture_count * sizeof(Picture), fail)
+    for(i = 0; i < s->picture_count; i++) {
         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
     }
 
@@ -665,9 +747,10 @@ av_cold int MPV_common_init(MpegEncContext *s)
     }
 
     s->context_initialized = 1;
+    s->thread_context[0]= s;
 
     if (s->width && s->height) {
-        s->thread_context[0]= s;
+    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
         threads = s->avctx->thread_count;
 
         for(i=1; i<threads; i++){
@@ -681,6 +764,11 @@ av_cold int MPV_common_init(MpegEncContext *s)
             s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
             s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
         }
+    } else {
+        if(init_duplicate_context(s, s) < 0) goto fail;
+        s->start_mb_y = 0;
+        s->end_mb_y   = s->mb_height;
+    }
     }
 
     return 0;
@@ -694,12 +782,14 @@ void MPV_common_end(MpegEncContext *s)
 {
     int i, j, k;
 
-    for(i=0; i<s->avctx->thread_count; i++){
-        free_duplicate_context(s->thread_context[i]);
-    }
-    for(i=1; i<s->avctx->thread_count; i++){
-        av_freep(&s->thread_context[i]);
-    }
+    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
+        for(i=0; i<s->avctx->thread_count; i++){
+            free_duplicate_context(s->thread_context[i]);
+        }
+        for(i=1; i<s->avctx->thread_count; i++){
+            av_freep(&s->thread_context[i]);
+        }
+    } else free_duplicate_context(s);
 
     av_freep(&s->parse_context.buffer);
     s->parse_context.buffer_size=0;
@@ -754,8 +844,8 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->reordered_input_picture);
     av_freep(&s->dct_offset);
 
-    if(s->picture){
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+    if(s->picture && !s->avctx->is_copy){
+        for(i=0; i<s->picture_count; i++){
             free_picture(s, &s->picture[i]);
         }
     }
@@ -769,7 +859,8 @@ void MPV_common_end(MpegEncContext *s)
     for(i=0; i<3; i++)
         av_freep(&s->visualization_buffer[i]);
 
-    avcodec_default_free_buffers(s->avctx);
+    if(!(s->avctx->active_thread_type&FF_THREAD_FRAME))
+        avcodec_default_free_buffers(s->avctx);
 }
 
 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
@@ -863,18 +954,33 @@ void init_vlc_rl(RLTable *rl)
     }
 }
 
+void ff_release_unused_pictures(MpegEncContext *s, int remove_current)
+{
+    int i;
+
+    /* release non reference frames */
+    for(i=0; i<s->picture_count; i++){
+        if(s->picture[i].data[0] && !s->picture[i].reference
+           && (!s->picture[i].owner2 || s->picture[i].owner2 == s)
+           && (remove_current || &s->picture[i] != s->current_picture_ptr)
+           /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+            free_frame_buffer(s, &s->picture[i]);
+        }
+    }
+}
+
 int ff_find_unused_picture(MpegEncContext *s, int shared){
     int i;
 
     if(shared){
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
         }
     }else{
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
         }
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL) return i;
         }
     }
@@ -931,7 +1037,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         /* release forgotten pictures */
         /* if(mpeg124/h263) */
         if(!s->encoding){
-            for(i=0; i<MAX_PICTURE_COUNT; i++){
+            for(i=0; i<s->picture_count; i++){
                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
                     free_frame_buffer(s, &s->picture[i]);
@@ -942,12 +1048,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     }
 
     if(!s->encoding){
-        /* release non reference frames */
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
-                free_frame_buffer(s, &s->picture[i]);
-            }
-        }
+        ff_release_unused_pictures(s, 1);
 
         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
@@ -977,6 +1078,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
                 s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
         }
         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
+        s->current_picture_ptr->field_picture= s->picture_structure != PICT_FRAME;
     }
 
     s->current_picture_ptr->pict_type= s->pict_type;
@@ -1010,6 +1112,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             s->last_picture_ptr= &s->picture[i];
             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
                 return -1;
+            ff_thread_report_progress((AVFrame*)s->last_picture_ptr, INT_MAX, 0);
+            ff_thread_report_progress((AVFrame*)s->last_picture_ptr, INT_MAX, 1);
         }
         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==AV_PICTURE_TYPE_B){
             /* Allocate a dummy frame */
@@ -1017,6 +1121,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             s->next_picture_ptr= &s->picture[i];
             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
                 return -1;
+            ff_thread_report_progress((AVFrame*)s->next_picture_ptr, INT_MAX, 0);
+            ff_thread_report_progress((AVFrame*)s->next_picture_ptr, INT_MAX, 1);
         }
     }
 
@@ -1068,11 +1174,12 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 void MPV_frame_end(MpegEncContext *s)
 {
     int i;
-    /* draw edge for correct motion prediction if outside */
+    /* redraw edges for the frame if decoding didn't complete */
     //just to make sure that all data is rendered.
     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
         ff_xvmc_field_end(s);
-    }else if(!s->avctx->hwaccel
+   }else if((s->error_count || s->encoding)
+       && !s->avctx->hwaccel
        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
        && s->unrestricted_mv
        && s->current_picture.reference
@@ -1088,6 +1195,7 @@ void MPV_frame_end(MpegEncContext *s)
                               s->h_edge_pos>>1, s->v_edge_pos>>1,
                               EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
     }
+
     emms_c();
 
     s->last_pict_type    = s->pict_type;
@@ -1108,7 +1216,7 @@ void MPV_frame_end(MpegEncContext *s)
 
     if(s->encoding){
         /* release non-reference frames */
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=0; i<s->picture_count; i++){
             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
                 free_frame_buffer(s, &s->picture[i]);
             }
@@ -1121,6 +1229,10 @@ void MPV_frame_end(MpegEncContext *s)
     memset(&s->current_picture, 0, sizeof(Picture));
 #endif
     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
+
+    if (s->codec_id != CODEC_ID_H264 && s->current_picture.reference) {
+        ff_thread_report_progress((AVFrame*)s->current_picture_ptr, s->mb_height-1, 0);
+    }
 }
 
 /**
@@ -1785,6 +1897,43 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
     }
 }
 
+/**
+ * find the lowest MB row referenced in the MVs
+ */
+int MPV_lowest_referenced_row(MpegEncContext *s, int dir)
+{
+    int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample;
+    int my, off, i, mvs;
+
+    if (s->picture_structure != PICT_FRAME) goto unhandled;
+
+    switch (s->mv_type) {
+        case MV_TYPE_16X16:
+            mvs = 1;
+            break;
+        case MV_TYPE_16X8:
+            mvs = 2;
+            break;
+        case MV_TYPE_8X8:
+            mvs = 4;
+            break;
+        default:
+            goto unhandled;
+    }
+
+    for (i = 0; i < mvs; i++) {
+        my = s->mv[dir][i][1]<<qpel_shift;
+        my_max = FFMAX(my_max, my);
+        my_min = FFMIN(my_min, my);
+    }
+
+    off = (FFMAX(-my_min, my_max) + 63) >> 6;
+
+    return FFMIN(FFMAX(s->mb_y + off, 0), s->mb_height-1);
+unhandled:
+    return s->mb_height-1;
+}
+
 /* put block[] to dest[] */
 static inline void put_dct(MpegEncContext *s,
                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
@@ -1949,6 +2098,16 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
             /* motion handling */
             /* decoding or more than one mb_type (MC was already done otherwise) */
             if(!s->encoding){
+
+                if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        ff_thread_await_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0), 0);
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        ff_thread_await_progress((AVFrame*)s->next_picture_ptr, MPV_lowest_referenced_row(s, 1), 0);
+                    }
+                }
+
                 if(lowres_flag){
                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
 
@@ -2112,19 +2271,37 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
  * @param h is the normal height, this will be reduced automatically if needed for the last row
  */
 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
+    const int field_pic= s->picture_structure != PICT_FRAME;
+    if(field_pic){
+        h <<= 1;
+        y <<= 1;
+    }
+
+    if (!s->avctx->hwaccel
+       && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+       && s->unrestricted_mv
+       && s->current_picture.reference
+       && !s->intra_only
+       && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+        int sides = 0, edge_h;
+        if (y==0) sides |= EDGE_TOP;
+        if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
+
+        edge_h= FFMIN(h, s->v_edge_pos - y);
+
+        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y    *s->linesize  , s->linesize  , s->h_edge_pos   , edge_h   , EDGE_WIDTH  , sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+    }
+
+    h= FFMIN(h, s->avctx->height - y);
+
+    if(field_pic && s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
+
     if (s->avctx->draw_horiz_band) {
         AVFrame *src;
-        const int field_pic= s->picture_structure != PICT_FRAME;
         int offset[4];
 
-        h= FFMIN(h, (s->avctx->height>>field_pic) - y);
-
-        if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
-            h <<= 1;
-            y <<= 1;
-            if(s->first_field) return;
-        }
-
         if(s->pict_type==AV_PICTURE_TYPE_B || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
             src= (AVFrame*)s->current_picture_ptr;
         else if(s->last_picture_ptr)
@@ -2190,7 +2367,7 @@ void ff_mpeg_flush(AVCodecContext *avctx){
     if(s==NULL || s->picture==NULL)
         return;
 
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
+    for(i=0; i<s->picture_count; i++){
        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
         free_frame_buffer(s, &s->picture[i]);
@@ -2444,3 +2621,9 @@ void ff_set_qscale(MpegEncContext * s, int qscale)
     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
 }
+
+void MPV_report_decode_progress(MpegEncContext *s)
+{
+    if (s->pict_type != FF_B_TYPE && !s->partitioned_frame && !s->error_occurred)
+        ff_thread_report_progress((AVFrame*)s->current_picture_ptr, s->mb_y, 0);
+}
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 6d5ab19283..6ce7faa235 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -76,6 +76,8 @@ enum OutputFormat {
 #define EXT_START_CODE          0x000001b5
 #define USER_START_CODE         0x000001b2
 
+struct MpegEncContext;
+
 /**
  * Picture.
  */
@@ -123,6 +125,7 @@ typedef struct Picture{
     int ref_poc[2][2][16];      ///< h264 POCs of the frames used as reference (FIXME need per slice)
     int ref_count[2][2];        ///< number of entries in ref_poc              (FIXME need per slice)
     int mbaff;                  ///< h264 1 -> MBAFF frame 0-> not MBAFF
+    int field_picture;          ///< whether or not the picture was encoded in seperate fields
 
     int mb_var_sum;             ///< sum of MB variance for current frame
     int mc_mb_var_sum;          ///< motion compensated MB variance for current frame
@@ -131,10 +134,9 @@ typedef struct Picture{
     uint8_t *mb_mean;           ///< Table for MB luminance
     int32_t *mb_cmp_score;      ///< Table for MB cmp scores, for mb decision FIXME remove
     int b_frame_score;          /* */
+    struct MpegEncContext *owner2; ///< pointer to the MpegEncContext that allocated this picture
 } Picture;
 
-struct MpegEncContext;
-
 /**
  * Motion estimation context.
  */
@@ -291,6 +293,8 @@ typedef struct MpegEncContext {
     Picture *last_picture_ptr;     ///< pointer to the previous picture.
     Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred)
     Picture *current_picture_ptr;  ///< pointer to the current picture
+    int picture_count;             ///< number of allocated pictures (MAX_PICTURE_COUNT * avctx->thread_count)
+    int picture_range_start, picture_range_end; ///< the part of picture that this context can allocate in
     uint8_t *visualization_buffer[3]; //< temporary buffer vor MV visualization
     int last_dc[3];                ///< last DC values for MPEG1
     int16_t *dc_val_base;
@@ -470,7 +474,7 @@ typedef struct MpegEncContext {
     int last_bits; ///< temp var used for calculating the above vars
 
     /* error concealment / resync */
-    int error_count;
+    int error_count, error_occurred;
     uint8_t *error_status_table;       ///< table of the error status of each MB
 #define VP_START            1          ///< current MB is the first after a resync marker
 #define AC_ERROR            2
@@ -677,6 +681,10 @@ typedef struct MpegEncContext {
     void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block);
 } MpegEncContext;
 
+#define REBASE_PICTURE(pic, new_ctx, old_ctx) (pic ? \
+    (pic >= old_ctx->picture && pic < old_ctx->picture+old_ctx->picture_count ?\
+        &new_ctx->picture[pic - old_ctx->picture] : pic - (Picture*)old_ctx + (Picture*)new_ctx)\
+    : NULL)
 
 void MPV_decode_defaults(MpegEncContext *s);
 int MPV_common_init(MpegEncContext *s);
@@ -699,9 +707,13 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h);
 void ff_mpeg_flush(AVCodecContext *avctx);
 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict);
 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
+void ff_release_unused_pictures(MpegEncContext *s, int remove_current);
 int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
+int MPV_lowest_referenced_row(MpegEncContext *s, int dir);
+void MPV_report_decode_progress(MpegEncContext *s);
+int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src);
 const uint8_t *ff_find_start_code(const uint8_t *p, const uint8_t *end, uint32_t *state);
 void ff_set_qscale(MpegEncContext * s, int qscale);
 
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index a212149189..237ea64790 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -36,6 +36,7 @@
 #include "mjpegenc.h"
 #include "msmpeg4.h"
 #include "faandct.h"
+#include "thread.h"
 #include "aandcttab.h"
 #include "flv.h"
 #include "mpeg4video.h"
@@ -1225,9 +1226,9 @@ int MPV_encode_picture(AVCodecContext *avctx,
 {
     MpegEncContext *s = avctx->priv_data;
     AVFrame *pic_arg = data;
-    int i, stuffing_count;
+    int i, stuffing_count, context_count = avctx->active_thread_type&FF_THREAD_SLICE ? avctx->thread_count : 1;
 
-    for(i=0; i<avctx->thread_count; i++){
+    for(i=0; i<context_count; i++){
         int start_y= s->thread_context[i]->start_mb_y;
         int   end_y= s->thread_context[i]->  end_mb_y;
         int h= s->mb_height;
@@ -1291,7 +1292,7 @@ vbv_retry:
                     s->last_non_b_time= s->time - s->pp_time;
                 }
 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
-                for(i=0; i<avctx->thread_count; i++){
+                for(i=0; i<context_count; i++){
                     PutBitContext *pb= &s->thread_context[i]->pb;
                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
                 }
@@ -2758,6 +2759,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 {
     int i;
     int bits;
+    int context_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
 
     s->picture_number = picture_number;
 
@@ -2797,7 +2799,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     }
 
     s->mb_intra=0; //for the rate distortion & bit compare functions
-    for(i=1; i<s->avctx->thread_count; i++){
+    for(i=1; i<context_count; i++){
         ff_update_duplicate_context(s->thread_context[i], s);
     }
 
@@ -2810,11 +2812,11 @@ static int encode_picture(MpegEncContext *s, int picture_number)
         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
-                s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, s->avctx->thread_count, sizeof(void*));
+                s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
             }
         }
 
-        s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, s->avctx->thread_count, sizeof(void*));
+        s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
         /* I-Frame */
         for(i=0; i<s->mb_stride*s->mb_height; i++)
@@ -2822,10 +2824,10 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 
         if(!s->fixed_qscale){
             /* finding spatial complexity for I-frame rate control */
-            s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, s->avctx->thread_count, sizeof(void*));
+            s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
         }
     }
-    for(i=1; i<s->avctx->thread_count; i++){
+    for(i=1; i<context_count; i++){
         merge_context_after_me(s, s->thread_context[i]);
     }
     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
@@ -2961,11 +2963,11 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     bits= put_bits_count(&s->pb);
     s->header_bits= bits - s->last_bits;
 
-    for(i=1; i<s->avctx->thread_count; i++){
+    for(i=1; i<context_count; i++){
         update_duplicate_context_after_me(s->thread_context[i], s);
     }
-    s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, s->avctx->thread_count, sizeof(void*));
-    for(i=1; i<s->avctx->thread_count; i++){
+    s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
+    for(i=1; i<context_count; i++){
         merge_context_after_encode(s, s->thread_context[i]);
     }
     emms_c();
diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index 0de876087b..08ef4ba0c2 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -55,7 +55,7 @@ typedef struct ThreadContext {
 } ThreadContext;
 
 /// Max number of frame buffers that can be allocated when using frame threads.
-#define MAX_BUFFERS 32
+#define MAX_BUFFERS (32+1)
 
 /**
  * Context used by codec threads and stored in their AVCodecContext thread_opaque.
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 9e879940a9..0eb5afd63c 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -115,7 +115,7 @@ typedef struct InternalBuffer{
     enum PixelFormat pix_fmt;
 }InternalBuffer;
 
-#define INTERNAL_BUFFER_SIZE 32
+#define INTERNAL_BUFFER_SIZE (32+1)
 
 void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int linesize_align[4]){
     int w_align= 1;
@@ -360,6 +360,7 @@ void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
     assert(pic->type==FF_BUFFER_TYPE_INTERNAL);
     assert(s->internal_buffer_count);
 
+    if(s->internal_buffer){
     buf = NULL; /* avoids warning */
     for(i=0; i<s->internal_buffer_count; i++){ //just 3-5 checks so is not worth to optimize
         buf= &((InternalBuffer*)s->internal_buffer)[i];
@@ -371,6 +372,7 @@ void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
     last = &((InternalBuffer*)s->internal_buffer)[s->internal_buffer_count];
 
     FFSWAP(InternalBuffer, *buf, *last);
+    }
 
     for(i=0; i<4; i++){
         pic->data[i]=NULL;
diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index 5e6ff50e84..969bf413e4 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak
@@ -179,23 +179,23 @@ FATE_H264  := $(FATE_H264:%=fate-h264-conformance-%)                    \
 FATE_TESTS += $(FATE_H264)
 fate-h264: $(FATE_H264)
 
-fate-h264-conformance-aud_mw_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/AUD_MW_E.264
-fate-h264-conformance-ba1_ft_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BA1_FT_C.264
-fate-h264-conformance-ba1_sony_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BA1_Sony_D.jsv
-fate-h264-conformance-ba2_sony_f: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BA2_Sony_F.jsv
+fate-h264-conformance-aud_mw_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/AUD_MW_E.264
+fate-h264-conformance-ba1_ft_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA1_FT_C.264
+fate-h264-conformance-ba1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA1_Sony_D.jsv
+fate-h264-conformance-ba2_sony_f: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA2_Sony_F.jsv
 fate-h264-conformance-ba3_sva_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/BA3_SVA_C.264
-fate-h264-conformance-ba_mw_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BA_MW_D.264
-fate-h264-conformance-bamq1_jvc_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BAMQ1_JVC_C.264
-fate-h264-conformance-bamq2_jvc_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BAMQ2_JVC_C.264
-fate-h264-conformance-banm_mw_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BANM_MW_D.264
-fate-h264-conformance-basqp1_sony_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/BASQP1_Sony_C.jsv
-fate-h264-conformance-caba1_sony_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CABA1_Sony_D.jsv
-fate-h264-conformance-caba1_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CABA1_SVA_B.264
-fate-h264-conformance-caba2_sony_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CABA2_Sony_E.jsv
-fate-h264-conformance-caba2_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CABA2_SVA_B.264
+fate-h264-conformance-ba_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA_MW_D.264
+fate-h264-conformance-bamq1_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BAMQ1_JVC_C.264
+fate-h264-conformance-bamq2_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BAMQ2_JVC_C.264
+fate-h264-conformance-banm_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BANM_MW_D.264
+fate-h264-conformance-basqp1_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BASQP1_Sony_C.jsv
+fate-h264-conformance-caba1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA1_Sony_D.jsv
+fate-h264-conformance-caba1_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA1_SVA_B.264
+fate-h264-conformance-caba2_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA2_Sony_E.jsv
+fate-h264-conformance-caba2_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA2_SVA_B.264
 fate-h264-conformance-caba3_sony_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABA3_Sony_C.jsv
 fate-h264-conformance-caba3_sva_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABA3_SVA_B.264
-fate-h264-conformance-caba3_toshiba_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CABA3_TOSHIBA_E.264
+fate-h264-conformance-caba3_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA3_TOSHIBA_E.264
 fate-h264-conformance-cabac_mot_fld0_full: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_fld0_full.26l
 fate-h264-conformance-cabac_mot_frm0_full: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_frm0_full.26l
 fate-h264-conformance-cabac_mot_mbaff0_full: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_mbaff0_full.26l
@@ -206,7 +206,7 @@ fate-h264-conformance-cabastbr3_sony_b: CMD = framecrc  -vsync 0 -strict 1 -i $(
 fate-h264-conformance-cabref3_sand_d: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABREF3_Sand_D.264
 fate-h264-conformance-cacqp3_sony_d: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CACQP3_Sony_D.jsv
 fate-h264-conformance-cafi1_sva_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAFI1_SVA_C.264
-fate-h264-conformance-cama1_sony_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CAMA1_Sony_C.jsv
+fate-h264-conformance-cama1_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAMA1_Sony_C.jsv
 fate-h264-conformance-cama1_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMA1_TOSHIBA_B.264
 fate-h264-conformance-cama1_vtc_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cama1_vtc_c.avc
 fate-h264-conformance-cama2_vtc_b: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/cama2_vtc_b.avc
@@ -219,37 +219,37 @@ fate-h264-conformance-camanl3_sand_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SA
 fate-h264-conformance-camasl3_sony_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMASL3_Sony_B.jsv
 fate-h264-conformance-camp_mot_mbaff_l30: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMP_MOT_MBAFF_L30.26l
 fate-h264-conformance-camp_mot_mbaff_l31: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMP_MOT_MBAFF_L31.26l
-fate-h264-conformance-canl1_sony_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL1_Sony_E.jsv
-fate-h264-conformance-canl1_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL1_SVA_B.264
-fate-h264-conformance-canl1_toshiba_g: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL1_TOSHIBA_G.264
-fate-h264-conformance-canl2_sony_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL2_Sony_E.jsv
-fate-h264-conformance-canl2_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL2_SVA_B.264
+fate-h264-conformance-canl1_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_Sony_E.jsv
+fate-h264-conformance-canl1_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_SVA_B.264
+fate-h264-conformance-canl1_toshiba_g: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_TOSHIBA_G.264
+fate-h264-conformance-canl2_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL2_Sony_E.jsv
+fate-h264-conformance-canl2_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL2_SVA_B.264
 fate-h264-conformance-canl3_sony_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CANL3_Sony_C.jsv
-fate-h264-conformance-canl3_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL3_SVA_B.264
-fate-h264-conformance-canl4_sva_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANL4_SVA_B.264
-fate-h264-conformance-canlma2_sony_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANLMA2_Sony_C.jsv
-fate-h264-conformance-canlma3_sony_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CANLMA3_Sony_C.jsv
+fate-h264-conformance-canl3_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL3_SVA_B.264
+fate-h264-conformance-canl4_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL4_SVA_B.264
+fate-h264-conformance-canlma2_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANLMA2_Sony_C.jsv
+fate-h264-conformance-canlma3_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANLMA3_Sony_C.jsv
 fate-h264-conformance-capa1_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPA1_TOSHIBA_B.264
 fate-h264-conformance-capama3_sand_f: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPAMA3_Sand_F.264
-fate-h264-conformance-capcm1_sand_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CAPCM1_Sand_E.264
-fate-h264-conformance-capcmnl1_sand_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CAPCMNL1_Sand_E.264
+fate-h264-conformance-capcm1_sand_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAPCM1_Sand_E.264
+fate-h264-conformance-capcmnl1_sand_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAPCMNL1_Sand_E.264
 fate-h264-conformance-capm3_sony_d: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPM3_Sony_D.jsv
-fate-h264-conformance-caqp1_sony_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CAQP1_Sony_B.jsv
+fate-h264-conformance-caqp1_sony_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAQP1_Sony_B.jsv
 fate-h264-conformance-cavlc_mot_fld0_full_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_fld0_full_B.26l
 fate-h264-conformance-cavlc_mot_frm0_full_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_frm0_full_B.26l
 fate-h264-conformance-cavlc_mot_mbaff0_full_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_mbaff0_full_B.26l
 fate-h264-conformance-cavlc_mot_picaff0_full_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_picaff0_full_B.26l
-fate-h264-conformance-cawp1_toshiba_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CAWP1_TOSHIBA_E.264
+fate-h264-conformance-cawp1_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAWP1_TOSHIBA_E.264
 fate-h264-conformance-cawp5_toshiba_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAWP5_TOSHIBA_E.264
-fate-h264-conformance-ci1_ft_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CI1_FT_B.264
-fate-h264-conformance-ci_mw_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CI_MW_D.264
+fate-h264-conformance-ci1_ft_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CI1_FT_B.264
+fate-h264-conformance-ci_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CI_MW_D.264
 fate-h264-conformance-cvbs3_sony_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVBS3_Sony_C.jsv
-fate-h264-conformance-cvcanlma2_sony_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CVCANLMA2_Sony_C.jsv
+fate-h264-conformance-cvcanlma2_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVCANLMA2_Sony_C.jsv
 fate-h264-conformance-cvfi1_sony_d: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI1_Sony_D.jsv
 fate-h264-conformance-cvfi1_sva_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI1_SVA_C.264
 fate-h264-conformance-cvfi2_sony_h: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI2_Sony_H.jsv
 fate-h264-conformance-cvfi2_sva_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI2_SVA_C.264
-fate-h264-conformance-cvma1_sony_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CVMA1_Sony_D.jsv
+fate-h264-conformance-cvma1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVMA1_Sony_D.jsv
 fate-h264-conformance-cvma1_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMA1_TOSHIBA_B.264
 fate-h264-conformance-cvmanl1_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMANL1_TOSHIBA_B.264
 fate-h264-conformance-cvmanl2_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMANL2_TOSHIBA_B.264
@@ -261,68 +261,68 @@ fate-h264-conformance-cvmp_mot_frm_l31_b: CMD = framecrc  -vsync 0 -strict 1 -i
 fate-h264-conformance-cvnlfi1_sony_c: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/CVNLFI1_Sony_C.jsv
 fate-h264-conformance-cvnlfi2_sony_h: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVNLFI2_Sony_H.jsv
 fate-h264-conformance-cvpa1_toshiba_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVPA1_TOSHIBA_B.264
-fate-h264-conformance-cvpcmnl1_sva_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CVPCMNL1_SVA_C.264
-fate-h264-conformance-cvpcmnl2_sva_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CVPCMNL2_SVA_C.264
-fate-h264-conformance-cvwp1_toshiba_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/CVWP1_TOSHIBA_E.264
+fate-h264-conformance-cvpcmnl1_sva_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVPCMNL1_SVA_C.264
+fate-h264-conformance-cvpcmnl2_sva_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVPCMNL2_SVA_C.264
+fate-h264-conformance-cvwp1_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVWP1_TOSHIBA_E.264
 fate-h264-conformance-cvwp2_toshiba_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP2_TOSHIBA_E.264
 fate-h264-conformance-cvwp3_toshiba_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP3_TOSHIBA_E.264
 fate-h264-conformance-cvwp5_toshiba_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP5_TOSHIBA_E.264
 fate-h264-conformance-fi1_sony_e: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/FI1_Sony_E.jsv
-fate-h264-conformance-frext-alphaconformanceg: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/test8b43.264
-fate-h264-conformance-frext-bcrm_freh10: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh10.264 -vsync 0
-fate-h264-conformance-frext-brcm_freh11: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh11.264 -vsync 0
-fate-h264-conformance-frext-brcm_freh3: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh3.264
-fate-h264-conformance-frext-brcm_freh4: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh4.264 -vsync 0
-fate-h264-conformance-frext-brcm_freh5: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh5.264
-fate-h264-conformance-frext-brcm_freh8: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh8.264
-fate-h264-conformance-frext-brcm_freh9: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh9.264
-fate-h264-conformance-frext-freh12_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/Freh12_B.264
-fate-h264-conformance-frext-freh1_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/Freh1_B.264
-fate-h264-conformance-frext-freh2_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/Freh2_B.264
-fate-h264-conformance-frext-freh6: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/freh6.264 -vsync 0
-fate-h264-conformance-frext-freh7_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/Freh7_B.264 -vsync 0
-fate-h264-conformance-frext-frext01_jvc_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FREXT01_JVC_D.264
-fate-h264-conformance-frext-frext02_jvc_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FREXT02_JVC_C.264
-fate-h264-conformance-frext-frext1_panasonic_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FRExt1_Panasonic.avc
-fate-h264-conformance-frext-frext2_panasonic_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FRExt2_Panasonic.avc -vsync 0
-fate-h264-conformance-frext-frext3_panasonic_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FRExt3_Panasonic.avc
-fate-h264-conformance-frext-frext4_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/FRExt4_Panasonic.avc
+fate-h264-conformance-frext-alphaconformanceg: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/test8b43.264
+fate-h264-conformance-frext-bcrm_freh10: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh10.264 -vsync 0
+fate-h264-conformance-frext-brcm_freh11: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh11.264 -vsync 0
+fate-h264-conformance-frext-brcm_freh3: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh3.264
+fate-h264-conformance-frext-brcm_freh4: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh4.264 -vsync 0
+fate-h264-conformance-frext-brcm_freh5: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh5.264
+fate-h264-conformance-frext-brcm_freh8: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh8.264
+fate-h264-conformance-frext-brcm_freh9: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh9.264
+fate-h264-conformance-frext-freh12_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh12_B.264
+fate-h264-conformance-frext-freh1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh1_B.264
+fate-h264-conformance-frext-freh2_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh2_B.264
+fate-h264-conformance-frext-freh6: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh6.264 -vsync 0
+fate-h264-conformance-frext-freh7_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh7_B.264 -vsync 0
+fate-h264-conformance-frext-frext01_jvc_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FREXT01_JVC_D.264
+fate-h264-conformance-frext-frext02_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FREXT02_JVC_C.264
+fate-h264-conformance-frext-frext1_panasonic_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt1_Panasonic.avc
+fate-h264-conformance-frext-frext2_panasonic_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt2_Panasonic.avc -vsync 0
+fate-h264-conformance-frext-frext3_panasonic_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt3_Panasonic.avc
+fate-h264-conformance-frext-frext4_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt4_Panasonic.avc
 fate-h264-conformance-frext-frext_mmco4_sony_b: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt_MMCO4_Sony_B.264
-fate-h264-conformance-frext-hcaff1_hhi_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAFF1_HHI.264
-fate-h264-conformance-frext-hcafr1_hhi_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAFR1_HHI.264
-fate-h264-conformance-frext-hcafr2_hhi_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAFR2_HHI.264
-fate-h264-conformance-frext-hcafr3_hhi_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAFR3_HHI.264
-fate-h264-conformance-frext-hcafr4_hhi_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAFR4_HHI.264
-fate-h264-conformance-frext-hcamff1_hhi_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HCAMFF1_HHI.264
-fate-h264-conformance-frext-hpca_brcm_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCA_BRCM_C.264
-fate-h264-conformance-frext-hpcadq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCADQ_BRCM_B.264
-fate-h264-conformance-frext-hpcafl_bcrm_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCAFL_BRCM_C.264 -vsync 0
-fate-h264-conformance-frext-hpcaflnl_bcrm_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCAFLNL_BRCM_C.264 -vsync 0
-fate-h264-conformance-frext-hpcalq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCALQ_BRCM_B.264
-fate-h264-conformance-frext-hpcamapalq_bcrm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCAMAPALQ_BRCM_B.264 -vsync 0
-fate-h264-conformance-frext-hpcamolq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCAMOLQ_BRCM_B.264
-fate-h264-conformance-frext-hpcanl_brcm_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCANL_BRCM_C.264
-fate-h264-conformance-frext-hpcaq2lq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCAQ2LQ_BRCM_B.264
-fate-h264-conformance-frext-hpcv_brcm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCV_BRCM_A.264
-fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVFL_BRCM_A.264 -vsync 0
-fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0
-fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264
-fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264
-fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le
-fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-hcaff1_hhi_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFF1_HHI.264
+fate-h264-conformance-frext-hcafr1_hhi_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR1_HHI.264
+fate-h264-conformance-frext-hcafr2_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR2_HHI.264
+fate-h264-conformance-frext-hcafr3_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR3_HHI.264
+fate-h264-conformance-frext-hcafr4_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR4_HHI.264
+fate-h264-conformance-frext-hcamff1_hhi_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAMFF1_HHI.264
+fate-h264-conformance-frext-hpca_brcm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCA_BRCM_C.264
+fate-h264-conformance-frext-hpcadq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCADQ_BRCM_B.264
+fate-h264-conformance-frext-hpcafl_bcrm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAFL_BRCM_C.264 -vsync 0
+fate-h264-conformance-frext-hpcaflnl_bcrm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAFLNL_BRCM_C.264 -vsync 0
+fate-h264-conformance-frext-hpcalq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCALQ_BRCM_B.264
+fate-h264-conformance-frext-hpcamapalq_bcrm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAMAPALQ_BRCM_B.264 -vsync 0
+fate-h264-conformance-frext-hpcamolq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAMOLQ_BRCM_B.264
+fate-h264-conformance-frext-hpcanl_brcm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCANL_BRCM_C.264
+fate-h264-conformance-frext-hpcaq2lq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAQ2LQ_BRCM_B.264
+fate-h264-conformance-frext-hpcv_brcm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCV_BRCM_A.264
+fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVFL_BRCM_A.264 -vsync 0
+fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0
+fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264
+fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264
+fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le
+fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le
 fate-h264-conformance-hcbp2_hhi_a: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCBP2_HHI_A.264
 fate-h264-conformance-hcmp1_hhi_a: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCMP1_HHI_A.264
-fate-h264-conformance-ls_sva_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/LS_SVA_D.264
-fate-h264-conformance-midr_mw_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/MIDR_MW_D.264
-fate-h264-conformance-mps_mw_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/MPS_MW_A.264
-fate-h264-conformance-mr1_bt_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/MR1_BT_A.h264
-fate-h264-conformance-mr1_mw_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/MR1_MW_A.264
-fate-h264-conformance-mr2_mw_a: CMD = framecrc  -i $(SAMPLES)/h264-conformance/MR2_MW_A.264
+fate-h264-conformance-ls_sva_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/LS_SVA_D.264
+fate-h264-conformance-midr_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MIDR_MW_D.264
+fate-h264-conformance-mps_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MPS_MW_A.264
+fate-h264-conformance-mr1_bt_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR1_BT_A.h264
+fate-h264-conformance-mr1_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR1_MW_A.264
+fate-h264-conformance-mr2_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR2_MW_A.264
 fate-h264-conformance-mr2_tandberg_e: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR2_TANDBERG_E.264
 fate-h264-conformance-mr3_tandberg_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR3_TANDBERG_B.264
 fate-h264-conformance-mr4_tandberg_c: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR4_TANDBERG_C.264
@@ -332,26 +332,26 @@ fate-h264-conformance-mr7_bt_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)
 fate-h264-conformance-mr8_bt_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR8_BT_B.h264
 fate-h264-conformance-mr9_bt_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR9_BT_B.h264
 fate-h264-conformance-mv1_brcm_d: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/src19td.IBP.264
-fate-h264-conformance-nl1_sony_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NL1_Sony_D.jsv
-fate-h264-conformance-nl2_sony_h: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NL2_Sony_H.jsv
-fate-h264-conformance-nl3_sva_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NL3_SVA_E.264
-fate-h264-conformance-nlmq1_jvc_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NLMQ1_JVC_C.264
-fate-h264-conformance-nlmq2_jvc_c: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NLMQ2_JVC_C.264
-fate-h264-conformance-nrf_mw_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/NRF_MW_E.264
+fate-h264-conformance-nl1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL1_Sony_D.jsv
+fate-h264-conformance-nl2_sony_h: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL2_Sony_H.jsv
+fate-h264-conformance-nl3_sva_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL3_SVA_E.264
+fate-h264-conformance-nlmq1_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NLMQ1_JVC_C.264
+fate-h264-conformance-nlmq2_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NLMQ2_JVC_C.264
+fate-h264-conformance-nrf_mw_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NRF_MW_E.264
 fate-h264-conformance-sharp_mp_field_1_b: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_1_B.jvt
 fate-h264-conformance-sharp_mp_field_2_b: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_2_B.jvt
 fate-h264-conformance-sharp_mp_field_3_b: CMD = framecrc  -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_3_B.jvt
 fate-h264-conformance-sharp_mp_paff_1r2: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/Sharp_MP_PAFF_1r2.jvt
 fate-h264-conformance-sharp_mp_paff_2r: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/Sharp_MP_PAFF_2.jvt
 fate-h264-conformance-sl1_sva_b: CMD = framecrc  -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/SL1_SVA_B.264
-fate-h264-conformance-sva_ba1_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_BA1_B.264
-fate-h264-conformance-sva_ba2_d: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_BA2_D.264
-fate-h264-conformance-sva_base_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_Base_B.264
-fate-h264-conformance-sva_cl1_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_CL1_E.264
-fate-h264-conformance-sva_fm1_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_FM1_E.264
-fate-h264-conformance-sva_nl1_b: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_NL1_B.264
-fate-h264-conformance-sva_nl2_e: CMD = framecrc  -i $(SAMPLES)/h264-conformance/SVA_NL2_E.264
+fate-h264-conformance-sva_ba1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_BA1_B.264
+fate-h264-conformance-sva_ba2_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_BA2_D.264
+fate-h264-conformance-sva_base_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_Base_B.264
+fate-h264-conformance-sva_cl1_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_CL1_E.264
+fate-h264-conformance-sva_fm1_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_FM1_E.264
+fate-h264-conformance-sva_nl1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_NL1_B.264
+fate-h264-conformance-sva_nl2_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_NL2_E.264
 
-fate-h264-interlace-crop: CMD = framecrc  -vframes 3 -i $(SAMPLES)/h264/interlaced_crop.mp4
-fate-h264-lossless: CMD = framecrc -i $(SAMPLES)/h264/lossless.h264
+fate-h264-interlace-crop: CMD = framecrc -vsync 0 -vframes 3 -i $(SAMPLES)/h264/interlaced_crop.mp4
+fate-h264-lossless: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264/lossless.h264
 fate-h264-extreme-plane-pred: CMD = framemd5 -strict 1 -vsync 0 -i $(SAMPLES)/h264/extreme-plane-pred.h264
diff --git a/tests/ref/vsynth1/error b/tests/ref/vsynth1/error
index c3543f9d36..7edef21c90 100644
--- a/tests/ref/vsynth1/error
+++ b/tests/ref/vsynth1/error
@@ -1,4 +1,4 @@
 7416dfd319f04044d4575dc9d1b406e1 *./tests/data/vsynth1/error-mpeg4-adv.avi
-756836 ./tests/data/vsynth1/error-mpeg4-adv.avi
-ef8bfcd6e0883daba95d0f32486ebe2d *./tests/data/error.vsynth1.out.yuv
-stddev:   18.05 PSNR: 23.00 MAXDIFF:  245 bytes:  7603200/  7603200
+  756836 ./tests/data/vsynth1/error-mpeg4-adv.avi
+54342963593ba08bcde95244a011efe5 *./tests/data/error.vsynth1.out.yuv
+stddev:   17.59 PSNR: 23.22 MAXDIFF:  240 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth2/error b/tests/ref/vsynth2/error
index 4181b2d299..99363f5a42 100644
--- a/tests/ref/vsynth2/error
+++ b/tests/ref/vsynth2/error
@@ -1,4 +1,4 @@
 90e65096aa9ebafa3fe3f44a5a47cdc4 *./tests/data/vsynth2/error-mpeg4-adv.avi
-176588 ./tests/data/vsynth2/error-mpeg4-adv.avi
-9fe1082179f80179439953c7397a46ef *./tests/data/error.vsynth2.out.yuv
-stddev:    9.00 PSNR: 29.04 MAXDIFF:  168 bytes:  7603200/  7603200
+  176588 ./tests/data/vsynth2/error-mpeg4-adv.avi
+ce12aa852126f2740838dd2da9e21a03 *./tests/data/error.vsynth2.out.yuv
+stddev:   10.06 PSNR: 28.08 MAXDIFF:  193 bytes:  7603200/  7603200
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 933aa648d6..4cf2e20fd8 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -102,7 +102,7 @@ do_ffmpeg_crc()
 
 do_video_decoding()
 {
-    do_ffmpeg $raw_dst $DEC_OPTS $1 -i $target_path/$file -f rawvideo $ENC_OPTS $2
+    do_ffmpeg $raw_dst $DEC_OPTS $1 -i $target_path/$file -f rawvideo $ENC_OPTS -vsync 0 $2
 }
 
 do_video_encoding()

From 808d8ff6bb92e641cdd99a0b06767eabd707b925 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 11:25:56 -0700
Subject: [PATCH 512/830] swscale: allocate larger buffer to handle altivec
 overreads.

Altivec sws code intentionally overreads buffers for better performance,
so we need to allocate larger buffers to handle that.
---
 libswscale/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index a2a6a1e191..827abc66d2 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -749,7 +749,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     int srcH= c->srcH;
     int dstW= c->dstW;
     int dstH= c->dstH;
-    int dst_stride = FFALIGN(dstW * sizeof(int16_t), 16), dst_stride_px = dst_stride >> 1;
+    int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1;
     int flags, cpu_flags;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;

From 5ad38d93e86e66b29b329c04cdc86f22a96b7db3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 2 Jun 2011 16:09:22 +0200
Subject: [PATCH 513/830] cpudetect: add av_force_cpu_flags()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/avutil.h | 2 +-
 libavutil/cpu.c    | 9 +++++++--
 libavutil/cpu.h    | 7 +++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index e3bc6c72fa..cdd4f7131c 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  3
+#define LIBAVUTIL_VERSION_MINOR  4
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 66262b9afd..c439a830c5 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -19,10 +19,15 @@
 #include "cpu.h"
 #include "config.h"
 
+static int flags, checked;
+
+void av_force_cpu_flags(int arg){
+    flags   = arg;
+    checked = 1;
+}
+
 int av_get_cpu_flags(void)
 {
-    static int flags, checked;
-
     if (checked)
         return flags;
 
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 4ec3a27d7c..da93fb5f69 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -46,6 +46,13 @@
  */
 int av_get_cpu_flags(void);
 
+
+/**
+ * Disables cpu detection and forces the specified flags.
+ */
+void av_force_cpu_flags(int flags);
+
+
 /* The following CPU-specific functions shall not be called directly. */
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);

From 877f76ad33bb9b0b0d09565dd9ec1cf8e91096f1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 2 Jun 2011 19:26:40 +0200
Subject: [PATCH 514/830] swscale: Fix use of uninitialized values (bug
 probably introduced from a marge of libav)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale_template.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 3555883f0f..6c85487e9c 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -419,6 +419,8 @@ static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
         xpos+=xInc;
     }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
 }
 
       // *** horizontal scale Y line to temp buffer
@@ -465,6 +467,10 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
         xpos+=xInc;
     }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
+    }
 }
 
 inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,

From 6713989c231104ff4381ef58f25ec1af8603535b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 2 Jun 2011 04:41:44 +0200
Subject: [PATCH 515/830] swscale: dither for planar yuv outputs

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c              | 26 +++++++------
 libswscale/swscale_internal.h     | 10 +++--
 libswscale/swscale_template.c     | 31 ++++++++-------
 libswscale/x86/swscale_template.c | 64 ++++++++++++++++++-------------
 4 files changed, 77 insertions(+), 54 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 6bf7fcfd9d..65bb7c01c1 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -282,6 +282,8 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
+static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
+
 uint16_t dither_scale[15][16]={
 {    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
 {    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
@@ -417,12 +419,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
 static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc, int chrFilterSize,
-                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
+                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
+                               const uint8_t *lumDither, const uint8_t *chrDither)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++) {
-        int val=1<<18;
+        int val = lumDither[i&7] << 12;
         int j;
         for (j=0; j<lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
@@ -432,8 +435,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
-            int u=1<<18;
-            int v=1<<18;
+            int u = chrDither[i&7] << 12;
+            int v = chrDither[(i+3)&7] << 12;
             int j;
             for (j=0; j<chrFilterSize; j++) {
                 u += chrUSrc[j][i] * chrFilter[j];
@@ -446,7 +449,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 
     if (CONFIG_SWSCALE_ALPHA && aDest)
         for (i=0; i<dstW; i++) {
-            int val=1<<18;
+            int val = lumDither[i&7] << 12;
             int j;
             for (j=0; j<lumFilterSize; j++)
                 val += alpSrc[j][i] * lumFilter[j];
@@ -459,12 +462,13 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
                                 const int16_t **chrVSrc, int chrFilterSize,
-                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat,
+                                const uint8_t *lumDither, const uint8_t *chrDither)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++) {
-        int val=1<<18;
+        int val = lumDither[i&7]<<12;
         int j;
         for (j=0; j<lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
@@ -477,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
 
     if (dstFormat == PIX_FMT_NV12)
         for (i=0; i<chrDstW; i++) {
-            int u=1<<18;
-            int v=1<<18;
+            int u = chrDither[i&7]<<12;
+            int v = chrDither[(i+3)&7]<<12;
             int j;
             for (j=0; j<chrFilterSize; j++) {
                 u += chrUSrc[j][i] * chrFilter[j];
@@ -490,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         }
     else
         for (i=0; i<chrDstW; i++) {
-            int u=1<<18;
-            int v=1<<18;
+            int u = chrDither[i&7]<<12;
+            int v = chrDither[(i+3)&7]<<12;
             int j;
             for (j=0; j<chrFilterSize; j++) {
                 u += chrUSrc[j][i] * chrFilter[j];
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 8577448c38..87712be1b3 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -195,6 +195,8 @@ typedef struct SwsContext {
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
 #define UV_OFF                "11*8+4*4*256*3+48"
 #define UV_OFFx2              "11*8+4*4*256*3+56"
+#define DITHER16              "11*8+4*4*256*3+64"
+#define DITHER32              "11*8+4*4*256*3+64+16"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -219,6 +221,8 @@ typedef struct SwsContext {
     int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
     DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
     DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+    uint16_t dither16[8];
+    uint32_t dither32[8];
 
 #if HAVE_ALTIVEC
     vector signed short   CY;
@@ -255,13 +259,13 @@ typedef struct SwsContext {
                         const int16_t *chrFilter, const int16_t **chrUSrc,
                         const int16_t **chrVSrc, int chrFilterSize,
                         uint8_t *dest, uint8_t *uDest,
-                        int dstW, int chrDstW, int dstFormat);
+                        int dstW, int chrDstW, int dstFormat, const uint8_t *lumDither, const uint8_t *chrDither);
     void (*yuv2yuv1   )(struct SwsContext *c,
                         const int16_t *lumSrc, const int16_t *chrUSrc,
                         const int16_t *chrVSrc, const int16_t *alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
+                        int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
     void (*yuv2yuvX   )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -269,7 +273,7 @@ typedef struct SwsContext {
                         const int16_t **alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
+                        int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
     void (*yuv2packed1)(struct SwsContext *c,
                         const uint16_t *buf0,
                         const uint16_t *ubuf0, const uint16_t *ubuf1,
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 6c85487e9c..58e05ac7b3 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -24,11 +24,11 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
 {
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
+                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
 }
 
 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
@@ -36,36 +36,37 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc,
                                int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                               int dstW, int chrDstW, enum PixelFormat dstFormat)
+                               int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither)
 {
     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
                  chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                 dest, uDest, dstW, chrDstW, dstFormat);
+                 dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither);
 }
 
 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                               const int16_t *chrUSrc, const int16_t *chrVSrc,
                               const int16_t *alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
 {
     int i;
+
     for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
+        int val= (lumSrc[i]+lumDither[i&7])>>7;
         dest[i]= av_clip_uint8(val);
     }
 
     if (uDest)
         for (i=0; i<chrDstW; i++) {
-            int u=(chrUSrc[i]+64)>>7;
-            int v=(chrVSrc[i]+64)>>7;
+            int u=(chrUSrc[i]+chrDither[i&7])>>7;
+            int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
             uDest[i]= av_clip_uint8(u);
             vDest[i]= av_clip_uint8(v);
         }
 
     if (CONFIG_SWSCALE_ALPHA && aDest)
         for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
+            int val= (alpSrc[i]+lumDither[i&7])>>7;
             aDest[i]= av_clip_uint8(val);
         }
 }
@@ -609,6 +610,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+        const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY   &7] : flat64;
+        const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
 
         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
@@ -699,7 +702,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 c->yuv2nv12X(c,
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
+                             dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
@@ -716,13 +719,13 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
                     c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
-                                uDest, vDest, aDest, dstW, chrDstW);
+                                uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
                 } else { //General YV12
                     c->yuv2yuvX(c,
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
                                 chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
                 }
             } else {
                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
@@ -784,7 +787,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 yuv2nv12XinC(
                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
+                             dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
@@ -798,7 +801,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     yuv2yuvXinC(
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
                 }
             } else {
                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index e2a530ae8d..d726c175f6 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -39,8 +39,8 @@
 
 #define YSCALEYUV2YV12X(offset, dest, end, pos) \
     __asm__ volatile(\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-        "movq                             %%mm3, %%mm4      \n\t"\
+        "movq                  "DITHER16"+0(%0), %%mm3      \n\t"\
+        "movq                  "DITHER16"+8(%0), %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         ".p2align                             4             \n\t" /* FIXME Unroll? */\
@@ -62,8 +62,8 @@
         MOVNTQ(%%mm3, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-        "movq                             %%mm3, %%mm4      \n\t"\
+        "movq                  "DITHER16"+0(%0), %%mm3      \n\t"\
+        "movq                  "DITHER16"+8(%0), %%mm4      \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
@@ -78,13 +78,18 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **chrVSrc,
                                     int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW,
+                                    const uint8_t *lumDither, const uint8_t *chrDither)
 {
+    int i;
     if (uDest) {
         x86_reg uv_off = c->uv_off;
+        for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
+    for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4;
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
@@ -95,6 +100,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
 #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
     __asm__ volatile(\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+        "movq                  "DITHER32"+0(%0), %%mm4      \n\t"\
+        "movq                  "DITHER32"+8(%0), %%mm5      \n\t"\
+        "movq                 "DITHER32"+16(%0), %%mm6      \n\t"\
+        "movq                 "DITHER32"+24(%0), %%mm7      \n\t"\
         "pxor                             %%mm4, %%mm4      \n\t"\
         "pxor                             %%mm5, %%mm5      \n\t"\
         "pxor                             %%mm6, %%mm6      \n\t"\
@@ -126,26 +135,21 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "paddd                            %%mm2, %%mm6      \n\t"\
         "paddd                            %%mm0, %%mm7      \n\t"\
         " jnz                                1b             \n\t"\
-        "psrad                              $16, %%mm4      \n\t"\
-        "psrad                              $16, %%mm5      \n\t"\
-        "psrad                              $16, %%mm6      \n\t"\
-        "psrad                              $16, %%mm7      \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
+        "psrad                              $19, %%mm4      \n\t"\
+        "psrad                              $19, %%mm5      \n\t"\
+        "psrad                              $19, %%mm6      \n\t"\
+        "psrad                              $19, %%mm7      \n\t"\
         "packssdw                         %%mm5, %%mm4      \n\t"\
         "packssdw                         %%mm7, %%mm6      \n\t"\
-        "paddw                            %%mm0, %%mm4      \n\t"\
-        "paddw                            %%mm0, %%mm6      \n\t"\
-        "psraw                               $3, %%mm4      \n\t"\
-        "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
         MOVNTQ(%%mm4, (%1, %3))\
         "add                                 $8, %3         \n\t"\
         "cmp                                 %2, %3         \n\t"\
         "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-        "pxor                             %%mm4, %%mm4      \n\t"\
-        "pxor                             %%mm5, %%mm5      \n\t"\
-        "pxor                             %%mm6, %%mm6      \n\t"\
-        "pxor                             %%mm7, %%mm7      \n\t"\
+        "movq                  "DITHER32"+0(%0), %%mm4      \n\t"\
+        "movq                  "DITHER32"+8(%0), %%mm5      \n\t"\
+        "movq                 "DITHER32"+16(%0), %%mm6      \n\t"\
+        "movq                 "DITHER32"+24(%0), %%mm7      \n\t"\
         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
         "jb                                  1b             \n\t"\
         :: "r" (&c->redDither),\
@@ -159,13 +163,18 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW,
+                                       const uint8_t *lumDither, const uint8_t *chrDither)
 {
+    int i;
     if (uDest) {
         x86_reg uv_off = c->uv_off;
+        for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
+        for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
     }
+    for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12;
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
@@ -190,7 +199,8 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                     const int16_t *chrUSrc, const int16_t *chrVSrc,
                                     const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW,
+                                    const uint8_t *lumDither, const uint8_t *chrDither)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -211,14 +221,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
 
 #define YSCALEYUV2YV121_ACCURATE \
     "mov %2, %%"REG_a"                    \n\t"\
-    "pcmpeqw %%mm7, %%mm7                 \n\t"\
-    "psrlw                 $15, %%mm7     \n\t"\
-    "psllw                  $6, %%mm7     \n\t"\
+    "movq               0(%3), %%mm6      \n\t"\
+    "movq               8(%3), %%mm7      \n\t"\
     ".p2align                4            \n\t" /* FIXME Unroll? */\
     "1:                                   \n\t"\
     "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
     "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "paddsw             %%mm7, %%mm0      \n\t"\
+    "paddsw             %%mm6, %%mm0      \n\t"\
     "paddsw             %%mm7, %%mm1      \n\t"\
     "psraw                 $7, %%mm0      \n\t"\
     "psraw                 $7, %%mm1      \n\t"\
@@ -231,7 +240,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                        const int16_t *chrUSrc, const int16_t *chrVSrc,
                                        const int16_t *alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW,
+                                       const uint8_t *lumDither, const uint8_t *chrDither)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -240,10 +250,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
     while (p--) {
         if (dst[p]) {
+            int i;
+            for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i];
             __asm__ volatile(
                 YSCALEYUV2YV121_ACCURATE
                 :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                   "g" (-counter[p])
+                   "g" (-counter[p]), "r"(c->dither16)
                 : "%"REG_a
             );
         }

From e65ab9d94f1c8d8893e32d90467d9525625d306a Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 1 Jun 2011 17:26:27 +0100
Subject: [PATCH 516/830] Remove unused variables

---
 ffmpeg.c                      |  8 +++-----
 libavcodec/a64multienc.c      |  2 --
 libavcodec/aaccoder.c         |  9 +++------
 libavcodec/celp_filters.c     |  3 +--
 libavcodec/dca.c              |  6 ++----
 libavcodec/dirac.c            |  4 ++--
 libavcodec/ituh263dec.c       |  6 +++---
 libavcodec/mpeg12.c           |  3 +--
 libavcodec/mpeg12enc.c        |  3 +--
 libavcodec/mpeg4videodec.c    | 19 +++++++------------
 libavcodec/mpegvideo_parser.c |  3 +--
 libavcodec/msmpeg4.c          |  4 +---
 libavcodec/pngdec.c           | 13 ++++++-------
 libavcodec/rv10.c             |  4 ++--
 libavcodec/s302m.c            |  3 +--
 libavcodec/shorten.c          |  3 +--
 libavcodec/sp5xdec.c          |  3 ---
 libavcodec/svq1dec.c          |  3 +--
 libavcodec/targa.c            |  6 +++---
 libavcodec/truemotion2.c      | 14 --------------
 libavcodec/tscc.c             |  3 ---
 libavcodec/vmdav.c            |  2 --
 libavcodec/zmbvenc.c          |  4 ++--
 libavfilter/vf_fieldorder.c   |  3 +--
 libavformat/4xm.c             |  4 ++--
 libavformat/apetag.c          |  4 ++--
 libavformat/asfdec.c          | 19 ++++++++-----------
 libavformat/avidec.c          |  4 ++--
 libavformat/gif.c             |  4 ----
 libavformat/mmf.c             |  6 ++----
 libavformat/mov.c             |  6 +++---
 libavformat/oggdec.c          |  5 +----
 libavformat/oggparseogm.c     |  4 +---
 libavformat/rl2.c             |  6 ++----
 libavformat/rmdec.c           |  4 ++--
 libavformat/rpl.c             |  4 ++--
 libavformat/rtpdec_latm.c     |  5 ++---
 libavformat/sauce.c           |  4 ++--
 libavformat/smacker.c         |  2 --
 libavformat/sol.c             |  3 +--
 libavformat/yuv4mpeg.c        |  4 ++--
 41 files changed, 76 insertions(+), 143 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 7721438ad1..15ac03e3ed 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1117,7 +1117,7 @@ static void do_video_out(AVFormatContext *s,
                          int *frame_size)
 {
     int nb_frames, i, ret, resample_changed;
-    AVFrame *final_picture, *formatted_picture, *resampling_dst;
+    AVFrame *final_picture, *formatted_picture;
     AVCodecContext *enc, *dec;
     double sync_ipts;
 
@@ -1162,7 +1162,6 @@ static void do_video_out(AVFormatContext *s,
 
     formatted_picture = in_picture;
     final_picture = formatted_picture;
-    resampling_dst = &ost->pict_tmp;
 
     resample_changed = ost->resample_width   != dec->width  ||
                        ost->resample_height  != dec->height ||
@@ -1198,7 +1197,7 @@ static void do_video_out(AVFormatContext *s,
             }
         }
         sws_scale(ost->img_resample_ctx, formatted_picture->data, formatted_picture->linesize,
-              0, ost->resample_height, resampling_dst->data, resampling_dst->linesize);
+              0, ost->resample_height, final_picture->data, final_picture->linesize);
     }
 #endif
 
@@ -3617,7 +3616,6 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
 static void new_data_stream(AVFormatContext *oc, int file_idx)
 {
     AVStream *st;
-    AVOutputStream *ost;
     AVCodec *codec=NULL;
     AVCodecContext *data_enc;
 
@@ -3626,7 +3624,7 @@ static void new_data_stream(AVFormatContext *oc, int file_idx)
         fprintf(stderr, "Could not alloc stream\n");
         ffmpeg_exit(1);
     }
-    ost = new_output_stream(oc, file_idx);
+    new_output_stream(oc, file_idx);
     data_enc = st->codec;
     output_codecs = grow_array(output_codecs, sizeof(*output_codecs), &nb_output_codecs, nb_output_codecs + 1);
     if (!data_stream_copy) {
diff --git a/libavcodec/a64multienc.c b/libavcodec/a64multienc.c
index 8286d7ee3c..532f2a2824 100644
--- a/libavcodec/a64multienc.c
+++ b/libavcodec/a64multienc.c
@@ -252,7 +252,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     int b_width;
 
     int req_size;
-    int num_frames   = c->mc_lifetime;
 
     int *charmap     = c->mc_charmap;
     uint8_t *colram  = c->mc_colram;
@@ -280,7 +279,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         if (!c->mc_lifetime) return 0;
         /* no more frames in queue, prepare to flush remaining frames */
         if (!c->mc_frame_counter) {
-            num_frames = c->mc_lifetime;
             c->mc_lifetime = 0;
         }
         /* still frames in queue so limit lifetime to remaining frames */
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 83d3734089..4d5b98fa63 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -311,7 +311,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
                                      int win, int group_len, const float lambda)
 {
     BandCodingPath path[120][12];
-    int w, swb, cb, start, start2, size;
+    int w, swb, cb, start, size;
     int i, j;
     const int max_sfb  = sce->ics.max_sfb;
     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
@@ -329,7 +329,6 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
         path[0][cb].run      = 0;
     }
     for (swb = 0; swb < max_sfb; swb++) {
-        start2 = start;
         size = sce->ics.swb_sizes[swb];
         if (sce->zeroes[win*16 + swb]) {
             for (cb = 0; cb < 12; cb++) {
@@ -413,7 +412,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
                                   int win, int group_len, const float lambda)
 {
     BandCodingPath path[120][12];
-    int w, swb, cb, start, start2, size;
+    int w, swb, cb, start, size;
     int i, j;
     const int max_sfb  = sce->ics.max_sfb;
     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
@@ -431,7 +430,6 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
         path[0][cb].run      = 0;
     }
     for (swb = 0; swb < max_sfb; swb++) {
-        start2 = start;
         size = sce->ics.swb_sizes[swb];
         if (sce->zeroes[win*16 + swb]) {
             for (cb = 0; cb < 12; cb++) {
@@ -1006,12 +1004,11 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
                                        SingleChannelElement *sce,
                                        const float lambda)
 {
-    int start = 0, i, w, w2, g;
+    int i, w, w2, g;
     int minq = 255;
 
     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = w*128;
         for (g = 0; g < sce->ics.num_swb; g++) {
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index 32eaff3c75..25a6744b04 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -109,7 +109,7 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
     old_out2 = out[-2];
     old_out3 = out[-1];
     for (n = 0; n <= buffer_length - 4; n+=4) {
-        float tmp0,tmp1,tmp2,tmp3;
+        float tmp0,tmp1,tmp2;
         float val;
 
         out0 = in[0];
@@ -160,7 +160,6 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
         tmp0 = out0;
         tmp1 = out1;
         tmp2 = out2;
-        tmp3 = out3;
 
         out3 -= a * tmp2;
         out2 -= a * tmp1;
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index dbadebacff..a9b2c9b0c9 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1535,8 +1535,6 @@ static void dca_exss_parse_header(DCAContext *s)
 {
     int ss_index;
     int blownup;
-    int header_size;
-    int hd_size;
     int num_audiop = 1;
     int num_assets = 1;
     int active_ss_mask[8];
@@ -1549,8 +1547,8 @@ static void dca_exss_parse_header(DCAContext *s)
     ss_index = get_bits(&s->gb, 2);
 
     blownup = get_bits1(&s->gb);
-    header_size = get_bits(&s->gb, 8 + 4 * blownup) + 1;
-    hd_size = get_bits_long(&s->gb, 16 + 4 * blownup) + 1;
+    skip_bits(&s->gb, 8 + 4 * blownup); // header_size
+    skip_bits(&s->gb, 16 + 4 * blownup); // hd_size
 
     s->static_fields = get_bits1(&s->gb);
     if (s->static_fields) {
diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c
index 0256f29a53..09da1cbd56 100644
--- a/libavcodec/dirac.c
+++ b/libavcodec/dirac.c
@@ -245,11 +245,11 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
 int ff_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
                                    dirac_source_params *source)
 {
-    unsigned version_major, version_minor;
+    unsigned version_major;
     unsigned video_format, picture_coding_mode;
 
     version_major  = svq3_get_ue_golomb(gb);
-    version_minor  = svq3_get_ue_golomb(gb);
+    svq3_get_ue_golomb(gb); /* version_minor */
     avctx->profile = svq3_get_ue_golomb(gb);
     avctx->level   = svq3_get_ue_golomb(gb);
     video_format   = svq3_get_ue_golomb(gb);
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index aebfcee027..0bb92da833 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -152,7 +152,7 @@ int ff_h263_decode_mba(MpegEncContext *s)
  */
 static int h263_decode_gob_header(MpegEncContext *s)
 {
-    unsigned int val, gfid, gob_number;
+    unsigned int val, gob_number;
     int left;
 
     /* Check for GOB Start Code */
@@ -183,12 +183,12 @@ static int h263_decode_gob_header(MpegEncContext *s)
         s->qscale = get_bits(&s->gb, 5); /* SQUANT */
         if(get_bits1(&s->gb)==0)
             return -1;
-        gfid = get_bits(&s->gb, 2); /* GFID */
+        skip_bits(&s->gb, 2); /* GFID */
     }else{
         gob_number = get_bits(&s->gb, 5); /* GN */
         s->mb_x= 0;
         s->mb_y= s->gob_index* gob_number;
-        gfid = get_bits(&s->gb, 2); /* GFID */
+        skip_bits(&s->gb, 2); /* GFID */
         s->qscale = get_bits(&s->gb, 5); /* GQUANT */
     }
 
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 82c2987b90..0e1536f7ab 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -2128,14 +2128,13 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
 
-    int drop_frame_flag;
     int time_code_hours, time_code_minutes;
     int time_code_seconds, time_code_pictures;
     int broken_link;
 
     init_get_bits(&s->gb, buf, buf_size*8);
 
-    drop_frame_flag = get_bits1(&s->gb);
+    skip_bits1(&s->gb); /* drop_frame_flag */
 
     time_code_hours=get_bits(&s->gb,5);
     time_code_minutes = get_bits(&s->gb,6);
diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c
index 8aa31f7581..eb07ecfc5e 100644
--- a/libavcodec/mpeg12enc.c
+++ b/libavcodec/mpeg12enc.c
@@ -761,10 +761,9 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
 
                 if(mv==0) len= ff_mpeg12_mbMotionVectorTable[0][1];
                 else{
-                    int val, bit_size, range, code;
+                    int val, bit_size, code;
 
                     bit_size = f_code - 1;
-                    range = 1 << bit_size;
 
                     val=mv;
                     if (val < 0)
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 81f09c5a4b..22d7ace789 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -397,14 +397,13 @@ int mpeg4_decode_video_packet_header(MpegEncContext *s)
         header_extension= get_bits1(&s->gb);
     }
     if(header_extension){
-        int time_increment;
         int time_incr=0;
 
         while (get_bits1(&s->gb) != 0)
             time_incr++;
 
         check_marker(&s->gb, "before time_increment in video packed header");
-        time_increment= get_bits(&s->gb, s->time_increment_bits);
+        skip_bits(&s->gb, s->time_increment_bits); /* time_increment */
         check_marker(&s->gb, "before vop_coding_type in video packed header");
 
         skip_bits(&s->gb, 2); /* vop coding type */
@@ -1801,16 +1800,14 @@ no_cplx_est:
 
         if (s->scalability) {
             GetBitContext bak= *gb;
-            int ref_layer_id;
-            int ref_layer_sampling_dir;
             int h_sampling_factor_n;
             int h_sampling_factor_m;
             int v_sampling_factor_n;
             int v_sampling_factor_m;
 
             s->hierachy_type= get_bits1(gb);
-            ref_layer_id= get_bits(gb, 4);
-            ref_layer_sampling_dir= get_bits1(gb);
+            skip_bits(gb, 4);  /* ref_layer_id */
+            skip_bits1(gb);    /* ref_layer_sampling_dir */
             h_sampling_factor_n= get_bits(gb, 5);
             h_sampling_factor_m= get_bits(gb, 5);
             v_sampling_factor_n= get_bits(gb, 5);
@@ -1989,15 +1986,13 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
 
      if (s->shape != RECT_SHAPE) {
          if (s->vol_sprite_usage != 1 || s->pict_type != AV_PICTURE_TYPE_I) {
-             int width, height, hor_spat_ref, ver_spat_ref;
-
-             width = get_bits(gb, 13);
+             skip_bits(gb, 13); /* width */
              skip_bits1(gb);   /* marker */
-             height = get_bits(gb, 13);
+             skip_bits(gb, 13); /* height */
              skip_bits1(gb);   /* marker */
-             hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */
+             skip_bits(gb, 13); /* hor_spat_ref */
              skip_bits1(gb);   /* marker */
-             ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */
+             skip_bits(gb, 13); /* ver_spat_ref */
          }
          skip_bits1(gb); /* change_CR_disable */
 
diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c
index 42c85874f6..9688e18625 100644
--- a/libavcodec/mpegvideo_parser.c
+++ b/libavcodec/mpegvideo_parser.c
@@ -32,7 +32,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
     uint32_t start_code;
     int frame_rate_index, ext_type, bytes_left;
     int frame_rate_ext_n, frame_rate_ext_d;
-    int picture_structure, top_field_first, repeat_first_field, progressive_frame;
+    int top_field_first, repeat_first_field, progressive_frame;
     int horiz_size_ext, vert_size_ext, bit_rate_ext;
     int did_set_size=0;
 //FIXME replace the crap with get_bits()
@@ -91,7 +91,6 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                     break;
                 case 0x8: /* picture coding extension */
                     if (bytes_left >= 5) {
-                        picture_structure = buf[2]&3;
                         top_field_first = buf[3] & (1 << 7);
                         repeat_first_field = buf[3] & (1 << 1);
                         progressive_frame = buf[4] & (1 << 7);
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index c575a2f206..c740cfa0bd 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -1528,9 +1528,7 @@ int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size)
     /* the alt_bitstream reader could read over the end so we need to check it */
     if(left>=length && left<length+8)
     {
-        int fps;
-
-        fps= get_bits(&s->gb, 5);
+        skip_bits(&s->gb, 5); /* fps */
         s->bit_rate= get_bits(&s->gb, 11)*1024;
         if(s->msmpeg4_version>=3)
             s->flipflop_rounding= get_bits1(&s->gb);
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 39dbfdcc6f..7477f6746b 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -397,7 +397,7 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame *p;
     uint8_t *crow_buf_base = NULL;
     uint32_t tag, length;
-    int ret, crc;
+    int ret;
 
     FFSWAP(AVFrame *, s->current_picture, s->last_picture);
     avctx->coded_frame= s->current_picture;
@@ -451,7 +451,7 @@ static int decode_frame(AVCodecContext *avctx,
             s->compression_type = *s->bytestream++;
             s->filter_type = *s->bytestream++;
             s->interlace_type = *s->bytestream++;
-            crc = bytestream_get_be32(&s->bytestream);
+            s->bytestream += 4; /* crc */
             s->state |= PNG_IHDR;
             av_dlog(avctx, "width=%d height=%d depth=%d color_type=%d compression_type=%d filter_type=%d interlace_type=%d\n",
                     s->width, s->height, s->bit_depth, s->color_type,
@@ -547,8 +547,7 @@ static int decode_frame(AVCodecContext *avctx,
             s->state |= PNG_IDAT;
             if (png_decode_idat(s, length) < 0)
                 goto fail;
-            /* skip crc */
-            crc = bytestream_get_be32(&s->bytestream);
+            s->bytestream += 4; /* crc */
             break;
         case MKTAG('P', 'L', 'T', 'E'):
             {
@@ -568,7 +567,7 @@ static int decode_frame(AVCodecContext *avctx,
                     s->palette[i] = (0xff << 24);
                 }
                 s->state |= PNG_PLTE;
-                crc = bytestream_get_be32(&s->bytestream);
+                s->bytestream += 4; /* crc */
             }
             break;
         case MKTAG('t', 'R', 'N', 'S'):
@@ -584,13 +583,13 @@ static int decode_frame(AVCodecContext *avctx,
                     v = *s->bytestream++;
                     s->palette[i] = (s->palette[i] & 0x00ffffff) | (v << 24);
                 }
-                crc = bytestream_get_be32(&s->bytestream);
+                s->bytestream += 4; /* crc */
             }
             break;
         case MKTAG('I', 'E', 'N', 'D'):
             if (!(s->state & PNG_ALLIMAGE))
                 goto fail;
-            crc = bytestream_get_be32(&s->bytestream);
+            s->bytestream += 4; /* crc */
             goto exit_loop;
         default:
             /* skip tag */
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index d85cf37cac..6227dc6f6c 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -235,7 +235,7 @@ int rv_decode_dc(MpegEncContext *s, int n)
 /* read RV 1.0 compatible frame header */
 static int rv10_decode_picture_header(MpegEncContext *s)
 {
-    int mb_count, pb_frame, marker, unk, mb_xy;
+    int mb_count, pb_frame, marker, mb_xy;
 
     marker = get_bits1(&s->gb);
 
@@ -282,7 +282,7 @@ static int rv10_decode_picture_header(MpegEncContext *s)
         s->mb_y = 0;
         mb_count = s->mb_width * s->mb_height;
     }
-    unk= get_bits(&s->gb, 3);   /* ignored */
+    skip_bits(&s->gb, 3);   /* ignored */
     s->f_code = 1;
     s->unrestricted_mv = 1;
 
diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index fb1fd867d0..9d623efa8f 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c
@@ -29,7 +29,7 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
                                     int buf_size)
 {
     uint32_t h;
-    int frame_size, channels, id, bits;
+    int frame_size, channels, bits;
 
     if (buf_size <= AES3_HEADER_LEN) {
         av_log(avctx, AV_LOG_ERROR, "frame is too short\n");
@@ -48,7 +48,6 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
     h = AV_RB32(buf);
     frame_size =  (h >> 16) & 0xffff;
     channels   = ((h >> 14) & 0x0003) * 2 +  2;
-    id         =  (h >>  6) & 0x00ff;
     bits       = ((h >>  4) & 0x0003) * 4 + 16;
 
     if (AES3_HEADER_LEN + frame_size != buf_size || bits > 24) {
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index f19f0fe348..86e981f417 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -196,7 +196,6 @@ static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header
 {
     GetBitContext hb;
     int len;
-    int chunk_size;
     short wave_format;
 
     init_get_bits(&hb, header, header_size*8);
@@ -205,7 +204,7 @@ static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header
         return -1;
     }
 
-    chunk_size = get_le32(&hb);
+    skip_bits_long(&hb, 32);    /* chunk_size */
 
     if (get_le32(&hb) != MKTAG('W','A','V','E')) {
         av_log(avctx, AV_LOG_ERROR, "missing WAVE tag\n");
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 3d01020a6d..6726c18ca9 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -38,15 +38,12 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     int buf_size = avpkt->size;
     AVPacket avpkt_recoded;
     const int qscale = 5;
-    const uint8_t *buf_ptr;
     uint8_t *recoded;
     int i = 0, j = 0;
 
     if (!avctx->width || !avctx->height)
         return -1;
 
-    buf_ptr = buf;
-
     recoded = av_mallocz(buf_size + 1024);
     if (!recoded)
         return -1;
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 0ab09298fc..ef3b4be16c 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -554,9 +554,8 @@ static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 
 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
   int frame_size_code;
-  int temporal_reference;
 
-  temporal_reference = get_bits (bitbuf, 8);
+  skip_bits(bitbuf, 8); /* temporal_reference */
 
   /* frame type */
   s->pict_type= get_bits (bitbuf, 2)+1;
diff --git a/libavcodec/targa.c b/libavcodec/targa.c
index 910cc1ba94..6a852a8528 100644
--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@@ -108,18 +108,18 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p= (AVFrame*)&s->picture;
     uint8_t *dst;
     int stride;
-    int idlen, pal, compr, x, y, w, h, bpp, flags;
+    int idlen, compr, y, w, h, bpp, flags;
     int first_clr, colors, csize;
 
     /* parse image header */
     CHECK_BUFFER_SIZE(buf, buf_end, 18, "header");
     idlen = *buf++;
-    pal = *buf++;
+    buf++; /* pal */
     compr = *buf++;
     first_clr = AV_RL16(buf); buf += 2;
     colors = AV_RL16(buf); buf += 2;
     csize = *buf++;
-    x = AV_RL16(buf); buf += 2;
+    buf += 2; /* x */
     y = AV_RL16(buf); buf += 2;
     w = AV_RL16(buf); buf += 2;
     h = AV_RL16(buf); buf += 2;
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index f9e607c4f7..23abade214 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -201,7 +201,6 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf)
 {
     uint32_t magic;
     const uint8_t *obuf;
-    int length;
 
     obuf = buf;
 
@@ -212,19 +211,6 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf)
 /*      av_log (ctx->avctx, AV_LOG_ERROR, "TM2 old header: not implemented (yet)\n"); */
         return 40;
     } else if(magic == 0x00000101) { /* new header */
-        int w, h, size, flags, xr, yr;
-
-        length = AV_RL32(buf);
-        buf += 4;
-
-        init_get_bits(&ctx->gb, buf, 32 * 8);
-        size = get_bits_long(&ctx->gb, 31);
-        h = get_bits(&ctx->gb, 15);
-        w = get_bits(&ctx->gb, 15);
-        flags = get_bits_long(&ctx->gb, 31);
-        yr = get_bits(&ctx->gb, 9);
-        xr = get_bits(&ctx->gb, 9);
-
         return 40;
     } else {
         av_log (ctx->avctx, AV_LOG_ERROR, "Not a TM2 header: 0x%08X\n", magic);
diff --git a/libavcodec/tscc.c b/libavcodec/tscc.c
index bd05f02a61..772906aba6 100644
--- a/libavcodec/tscc.c
+++ b/libavcodec/tscc.c
@@ -75,7 +75,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
     int buf_size = avpkt->size;
     CamtasiaContext * const c = avctx->priv_data;
     const unsigned char *encoded = buf;
-    unsigned char *outptr;
     int zret; // Zlib return code
     int len = buf_size;
 
@@ -89,8 +88,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
         return -1;
     }
 
-    outptr = c->pic.data[0]; // Output image pointer
-
     zret = inflateReset(&(c->zstream));
     if (zret != Z_OK) {
         av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
diff --git a/libavcodec/vmdav.c b/libavcodec/vmdav.c
index 710c2028cf..d258252d95 100644
--- a/libavcodec/vmdav.c
+++ b/libavcodec/vmdav.c
@@ -199,7 +199,6 @@ static void vmd_decode(VmdVideoContext *s)
 
     int frame_x, frame_y;
     int frame_width, frame_height;
-    int dp_size;
 
     frame_x = AV_RL16(&s->buf[6]);
     frame_y = AV_RL16(&s->buf[8]);
@@ -247,7 +246,6 @@ static void vmd_decode(VmdVideoContext *s)
         }
 
         dp = &s->frame.data[0][frame_y * s->frame.linesize[0] + frame_x];
-        dp_size = s->frame.linesize[0] * s->avctx->height;
         pp = &s->prev_frame.data[0][frame_y * s->prev_frame.linesize[0] + frame_x];
         switch (meth) {
         case 1:
diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 271ab2dc96..e4f4860ea0 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -181,7 +181,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void
         int x, y, bh2, bw2, xored;
         uint8_t *tsrc, *tprev;
         uint8_t *mv;
-        int mx, my, bv;
+        int mx, my;
 
         bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
         bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
@@ -197,7 +197,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void
                 tsrc = src + x;
                 tprev = prev + x;
 
-                bv = zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored);
+                zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored);
                 mv[0] = (mx << 1) | !!xored;
                 mv[1] = my << 1;
                 tprev += mx + my * c->pstride;
diff --git a/libavfilter/vf_fieldorder.c b/libavfilter/vf_fieldorder.c
index b55640bbc2..444dffb52c 100644
--- a/libavfilter/vf_fieldorder.c
+++ b/libavfilter/vf_fieldorder.c
@@ -153,7 +153,7 @@ static void end_frame(AVFilterLink *inlink)
     AVFilterBufferRef *inpicref   = inlink->cur_buf;
     AVFilterBufferRef *outpicref  = outlink->out_buf;
 
-    int               h, w, plane, line_step, line_size, line;
+    int               h, plane, line_step, line_size, line;
     uint8_t           *cpy_src, *cpy_dst;
 
     if (    inpicref->video->interlaced
@@ -162,7 +162,6 @@ static void end_frame(AVFilterLink *inlink)
                 "picture will move %s one line\n",
                 fieldorder->dst_tff ? "up" : "down");
         h = inpicref->video->h;
-        w = inpicref->video->w;
         for (plane = 0; plane < 4 && inpicref->data[plane]; plane++) {
             line_step = inpicref->linesize[plane];
             line_size = fieldorder->line_size[plane];
diff --git a/libavformat/4xm.c b/libavformat/4xm.c
index ff0baaec8f..93c90e8cbc 100644
--- a/libavformat/4xm.c
+++ b/libavformat/4xm.c
@@ -246,7 +246,7 @@ static int fourxm_read_packet(AVFormatContext *s,
     FourxmDemuxContext *fourxm = s->priv_data;
     AVIOContext *pb = s->pb;
     unsigned int fourcc_tag;
-    unsigned int size, out_size;
+    unsigned int size;
     int ret = 0;
     unsigned int track_number;
     int packet_read = 0;
@@ -295,7 +295,7 @@ static int fourxm_read_packet(AVFormatContext *s,
 
         case snd__TAG:
             track_number = avio_rl32(pb);
-            out_size= avio_rl32(pb);
+            avio_skip(pb, 4);
             size-=8;
 
             if (track_number < fourxm->track_count && fourxm->tracks[track_number].channels>0) {
diff --git a/libavformat/apetag.c b/libavformat/apetag.c
index b8a460a599..6e925354af 100644
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -35,11 +35,11 @@ static int ape_tag_read_field(AVFormatContext *s)
 {
     AVIOContext *pb = s->pb;
     uint8_t key[1024], *value;
-    uint32_t size, flags;
+    uint32_t size;
     int i, c;
 
     size = avio_rl32(pb);  /* field size */
-    flags = avio_rl32(pb); /* field flags */
+    avio_skip(pb, 4);      /* field flags */
     for (i = 0; i < sizeof(key) - 1; i++) {
         c = avio_r8(pb);
         if (c < 0x20 || c > 0x7E)
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 30642a61fa..789df1274d 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -216,7 +216,6 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
     ff_asf_guid g;
     enum AVMediaType type;
     int type_specific_size, sizeX;
-    uint64_t total_size;
     unsigned int tag1;
     int64_t pos1, pos2, start_time;
     int test_for_ext_stream_audio, is_dvr_ms_audio=0;
@@ -264,7 +263,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
         return -1;
     }
     ff_get_guid(pb, &g);
-    total_size = avio_rl64(pb);
+    avio_skip(pb, 8); /* total_size */
     type_specific_size = avio_rl32(pb);
     avio_rl32(pb);
     st->id = avio_rl16(pb) & 0x7f; /* stream id */
@@ -401,7 +400,7 @@ static int asf_read_ext_stream_properties(AVFormatContext *s, int64_t size)
     AVIOContext *pb = s->pb;
     ff_asf_guid g;
     int ext_len, payload_ext_ct, stream_ct, i;
-    uint32_t ext_d, leak_rate, stream_num;
+    uint32_t leak_rate, stream_num;
     unsigned int stream_languageid_index;
 
     avio_rl64(pb); // starttime
@@ -435,7 +434,7 @@ static int asf_read_ext_stream_properties(AVFormatContext *s, int64_t size)
 
     for (i=0; i<payload_ext_ct; i++){
         ff_get_guid(pb, &g);
-        ext_d=avio_rl16(pb);
+        avio_skip(pb, 2);
         ext_len=avio_rl32(pb);
         avio_skip(pb, ext_len);
     }
@@ -519,7 +518,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size)
 {
     AVIOContext *pb = s->pb;
     ASFContext *asf = s->priv_data;
-    int n, stream_num, name_len, value_len, value_type, value_num;
+    int n, stream_num, name_len, value_len, value_num;
     int ret, i;
     n = avio_rl16(pb);
 
@@ -529,7 +528,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size)
         avio_rl16(pb); //lang_list_index
         stream_num= avio_rl16(pb);
         name_len=   avio_rl16(pb);
-        value_type= avio_rl16(pb);
+        avio_skip(pb, 2); /* value_type */
         value_len=  avio_rl32(pb);
 
         if ((ret = avio_get_str16le(pb, name_len, name, sizeof(name))) < name_len)
@@ -634,10 +633,8 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap)
             // if so the next iteration will pick it up
             continue;
         } else if (!ff_guidcmp(&g, &ff_asf_head1_guid)) {
-            int v1, v2;
             ff_get_guid(pb, &g);
-            v1 = avio_rl32(pb);
-            v2 = avio_rl16(pb);
+            avio_skip(pb, 6);
             continue;
         } else if (!ff_guidcmp(&g, &ff_asf_marker_header)) {
             asf_read_marker(s, gsize);
@@ -807,7 +804,7 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
     ASFContext *asf = s->priv_data;
     int rsize = 1;
     int num = avio_r8(pb);
-    int64_t ts0, ts1;
+    int64_t ts0;
 
     asf->packet_segments--;
     asf->packet_key_frame = num >> 7;
@@ -830,7 +827,7 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
 //            av_log(s, AV_LOG_DEBUG, "\n");
             avio_skip(pb, 10);
             ts0= avio_rl64(pb);
-            ts1= avio_rl64(pb);
+            avio_skip(pb, 8);;
             avio_skip(pb, 12);
             avio_rl32(pb);
             avio_skip(pb, asf->packet_replic_size - 8 - 38 - 4);
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index a9ff688a86..ff270edae5 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -337,7 +337,7 @@ static int avi_read_header(AVFormatContext *s, AVFormatParameters *ap)
     AVIContext *avi = s->priv_data;
     AVIOContext *pb = s->pb;
     unsigned int tag, tag1, handler;
-    int codec_type, stream_index, frame_period, bit_rate;
+    int codec_type, stream_index, frame_period;
     unsigned int size;
     int i;
     AVStream *st;
@@ -407,7 +407,7 @@ static int avi_read_header(AVFormatContext *s, AVFormatParameters *ap)
             /* AVI header */
             /* using frame_period is bad idea */
             frame_period = avio_rl32(pb);
-            bit_rate = avio_rl32(pb) * 8;
+            avio_skip(pb, 4);
             avio_rl32(pb);
             avi->non_interleaved |= avio_rl32(pb) & AVIF_MUSTUSEINDEX;
 
diff --git a/libavformat/gif.c b/libavformat/gif.c
index dfd2ec16e8..55deb4d207 100644
--- a/libavformat/gif.c
+++ b/libavformat/gif.c
@@ -295,9 +295,7 @@ static int gif_write_video(AVFormatContext *s,
                            AVCodecContext *enc, const uint8_t *buf, int size)
 {
     AVIOContext *pb = s->pb;
-    GIFContext *gif = s->priv_data;
     int jiffies;
-    int64_t delay;
 
     /* graphic control extension block */
     avio_w8(pb, 0x21);
@@ -307,8 +305,6 @@ static int gif_write_video(AVFormatContext *s,
 
     /* 1 jiffy is 1/70 s */
     /* the delay_time field indicates the number of jiffies - 1 */
-    delay = gif->file_time - gif->time;
-
     /* XXX: should use delay, in order to be more accurate */
     /* instead of using the same rounded value each time */
     /* XXX: don't even remember if I really use it for now */
diff --git a/libavformat/mmf.c b/libavformat/mmf.c
index ec99c2d0dd..3848d5cf56 100644
--- a/libavformat/mmf.c
+++ b/libavformat/mmf.c
@@ -186,13 +186,13 @@ static int mmf_read_header(AVFormatContext *s,
     unsigned int tag;
     AVIOContext *pb = s->pb;
     AVStream *st;
-    int64_t file_size, size;
+    int64_t size;
     int rate, params;
 
     tag = avio_rl32(pb);
     if (tag != MKTAG('M', 'M', 'M', 'D'))
         return -1;
-    file_size = avio_rb32(pb);
+    avio_skip(pb, 4); /* file_size */
 
     /* Skip some unused chunks that may or may not be present */
     for(;; avio_skip(pb, size)) {
@@ -263,12 +263,10 @@ static int mmf_read_packet(AVFormatContext *s,
                            AVPacket *pkt)
 {
     MMFContext *mmf = s->priv_data;
-    AVStream *st;
     int ret, size;
 
     if (s->pb->eof_reached)
         return AVERROR(EIO);
-    st = s->streams[0];
 
     size = MAX_SIZE;
     if(size > mmf->data_size)
diff --git a/libavformat/mov.c b/libavformat/mov.c
index cede1f751a..8ec0d19ed2 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -464,21 +464,21 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
-    int tag, len;
+    int tag;
 
     if (fc->nb_streams < 1)
         return 0;
     st = fc->streams[fc->nb_streams-1];
 
     avio_rb32(pb); /* version + flags */
-    len = ff_mp4_read_descr(fc, pb, &tag);
+    ff_mp4_read_descr(fc, pb, &tag);
     if (tag == MP4ESDescrTag) {
         avio_rb16(pb); /* ID */
         avio_r8(pb); /* priority */
     } else
         avio_rb16(pb); /* ID */
 
-    len = ff_mp4_read_descr(fc, pb, &tag);
+    ff_mp4_read_descr(fc, pb, &tag);
     if (tag == MP4DecConfigDescrTag)
         ff_mp4_read_dec_config_descr(fc, st, pb);
     return 0;
diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 67678059a1..9562ea9ec0 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -196,8 +196,6 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     int flags, nsegs;
     uint64_t gp;
     uint32_t serial;
-    uint32_t seq;
-    uint32_t crc;
     int size, idx;
     uint8_t sync[4];
     int sp = 0;
@@ -230,8 +228,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
     flags = avio_r8(bc);
     gp = avio_rl64 (bc);
     serial = avio_rl32 (bc);
-    seq = avio_rl32 (bc);
-    crc = avio_rl32 (bc);
+    avio_skip(bc, 8); /* seq, crc */
     nsegs = avio_r8(bc);
 
     idx = ogg_find_stream (ogg, serial);
diff --git a/libavformat/oggparseogm.c b/libavformat/oggparseogm.c
index dda5be601a..9526c3acc1 100644
--- a/libavformat/oggparseogm.c
+++ b/libavformat/oggparseogm.c
@@ -39,7 +39,6 @@ ogm_header(AVFormatContext *s, int idx)
     const uint8_t *p = os->buf + os->pstart;
     uint64_t time_unit;
     uint64_t spu;
-    uint32_t default_len;
 
     if(!(*p & 1))
         return 0;
@@ -74,8 +73,7 @@ ogm_header(AVFormatContext *s, int idx)
 
         time_unit   = bytestream_get_le64(&p);
         spu         = bytestream_get_le64(&p);
-        default_len = bytestream_get_le32(&p);
-
+        p += 4;                     /* default_len */
         p += 8;                     /* buffersize + bits_per_sample */
 
         if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO){
diff --git a/libavformat/rl2.c b/libavformat/rl2.c
index f2d68384b8..b4172c284a 100644
--- a/libavformat/rl2.c
+++ b/libavformat/rl2.c
@@ -80,8 +80,6 @@ static av_cold int rl2_read_header(AVFormatContext *s,
     unsigned int audio_frame_counter = 0;
     unsigned int video_frame_counter = 0;
     unsigned int back_size;
-    int data_size;
-    unsigned short encoding_method;
     unsigned short sound_rate;
     unsigned short rate;
     unsigned short channels;
@@ -98,14 +96,14 @@ static av_cold int rl2_read_header(AVFormatContext *s,
     avio_skip(pb,4);          /* skip FORM tag */
     back_size = avio_rl32(pb); /**< get size of the background frame */
     signature = avio_rb32(pb);
-    data_size = avio_rb32(pb);
+    avio_skip(pb, 4);         /* data size */
     frame_count = avio_rl32(pb);
 
     /* disallow back_sizes and frame_counts that may lead to overflows later */
     if(back_size > INT_MAX/2  || frame_count > INT_MAX / sizeof(uint32_t))
         return AVERROR_INVALIDDATA;
 
-    encoding_method = avio_rl16(pb);
+    avio_skip(pb, 2);         /* encoding mentod */
     sound_rate = avio_rl16(pb);
     rate = avio_rl16(pb);
     channels = avio_rl16(pb);
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index 843706dec3..d6a5251c05 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -280,7 +280,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVIOContext *pb,
         if (rm_read_audio_stream_info(s, pb, st, rst, 0))
             return -1;
     } else {
-        int fps, fps2;
+        int fps;
         if (avio_rl32(pb) != MKTAG('V', 'I', 'D', 'O')) {
         fail1:
             av_log(st->codec, AV_LOG_ERROR, "Unsupported video codec\n");
@@ -298,7 +298,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVIOContext *pb,
         fps= avio_rb16(pb);
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
         avio_rb32(pb);
-        fps2= avio_rb16(pb);
+        avio_skip(pb, 2);
         avio_rb16(pb);
 
         if ((ret = rm_read_extradata(pb, st->codec, codec_data_size - (avio_tell(pb) - codec_pos))) < 0)
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index 1ca4a86d1b..935b81d7d8 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -299,9 +299,9 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt)
         stream->codec->codec_tag == 124) {
         // We have to split Escape 124 frames because there are
         // multiple frames per chunk in Escape 124 samples.
-        uint32_t frame_size, frame_flags;
+        uint32_t frame_size;
 
-        frame_flags = avio_rl32(pb);
+        avio_skip(pb, 4); /* flags */
         frame_size = avio_rl32(pb);
         if (avio_seek(pb, -8, SEEK_CUR) < 0)
             return AVERROR(EIO);
diff --git a/libavformat/rtpdec_latm.c b/libavformat/rtpdec_latm.c
index bde34b7ab9..42317a9529 100644
--- a/libavformat/rtpdec_latm.c
+++ b/libavformat/rtpdec_latm.c
@@ -108,8 +108,7 @@ static int parse_fmtp_config(AVStream *st, char *value)
     int len = ff_hex_to_data(NULL, value), i, ret = 0;
     GetBitContext gb;
     uint8_t *config;
-    int audio_mux_version, same_time_framing, num_sub_frames,
-        num_programs, num_layers;
+    int audio_mux_version, same_time_framing, num_programs, num_layers;
 
     /* Pad this buffer, too, to avoid out of bounds reads with get_bits below */
     config = av_mallocz(len + FF_INPUT_BUFFER_PADDING_SIZE);
@@ -119,7 +118,7 @@ static int parse_fmtp_config(AVStream *st, char *value)
     init_get_bits(&gb, config, len*8);
     audio_mux_version = get_bits(&gb, 1);
     same_time_framing = get_bits(&gb, 1);
-    num_sub_frames    = get_bits(&gb, 6);
+    skip_bits(&gb, 6); /* num_sub_frames */
     num_programs      = get_bits(&gb, 4);
     num_layers        = get_bits(&gb, 3);
     if (audio_mux_version != 0 || same_time_framing != 1 || num_programs != 0 ||
diff --git a/libavformat/sauce.c b/libavformat/sauce.c
index cf33ab7be7..f9ca17ac30 100644
--- a/libavformat/sauce.c
+++ b/libavformat/sauce.c
@@ -32,7 +32,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g
 {
     AVIOContext *pb = avctx->pb;
     char buf[36];
-    int datatype, filetype, t1, t2, nb_comments, flags;
+    int datatype, filetype, t1, t2, nb_comments;
     uint64_t start_pos = avio_size(pb) - 128;
 
     avio_seek(pb, start_pos, SEEK_SET);
@@ -57,7 +57,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g
     t1          = avio_rl16(pb);
     t2          = avio_rl16(pb);
     nb_comments = avio_r8(pb);
-    flags       = avio_r8(pb);
+    avio_skip(pb, 1); /* flags */
     avio_skip(pb, 4);
     GET_SAUCE_META("encoder",   22);
 
diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index 01d1303ae3..db9a02bb6c 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -233,7 +233,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
     int i;
     int frame_size = 0;
     int palchange = 0;
-    int pos;
 
     if (s->pb->eof_reached || smk->cur_frame >= smk->frames)
         return AVERROR_EOF;
@@ -244,7 +243,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
         frame_size = smk->frm_size[smk->cur_frame] & (~3);
         flags = smk->frm_flags[smk->cur_frame];
         /* handle palette change event */
-        pos = avio_tell(s->pb);
         if(flags & SMACKER_PAL){
             int size, sz, t, off, j, pos;
             uint8_t *pal = smk->pal;
diff --git a/libavformat/sol.c b/libavformat/sol.c
index 1c9ce69538..1ebb4d2e30 100644
--- a/libavformat/sol.c
+++ b/libavformat/sol.c
@@ -85,7 +85,6 @@ static int sol_channels(int magic, int type)
 static int sol_read_header(AVFormatContext *s,
                           AVFormatParameters *ap)
 {
-    int size;
     unsigned int magic,tag;
     AVIOContext *pb = s->pb;
     unsigned int id, channels, rate, type;
@@ -99,7 +98,7 @@ static int sol_read_header(AVFormatContext *s,
         return -1;
     rate = avio_rl16(pb);
     type = avio_r8(pb);
-    size = avio_rl32(pb);
+    avio_skip(pb, 4); /* size */
     if (magic != 0x0B8D)
         avio_r8(pb); /* newer SOLs contain padding byte */
 
diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c
index d712321162..9a6a0c8315 100644
--- a/libavformat/yuv4mpeg.c
+++ b/libavformat/yuv4mpeg.c
@@ -94,7 +94,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
     AVPicture *picture;
     int* first_pkt = s->priv_data;
     int width, height, h_chroma_shift, v_chroma_shift;
-    int i, m;
+    int i;
     char buf2[Y4M_LINE_MAX+1];
     char buf1[20];
     uint8_t *ptr, *ptr1, *ptr2;
@@ -114,7 +114,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     /* construct frame header */
 
-    m = snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC);
+    snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC);
     avio_write(pb, buf1, strlen(buf1));
 
     width = st->codec->width;

From 14086341bfd2f8f5a5dfbb23868bf8cf677862ef Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 1 Jun 2011 17:29:22 +0100
Subject: [PATCH 517/830] nutdec: remove unused variable

---
 libavformat/nutdec.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index 0a1ed554e3..d8278175a0 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -190,7 +190,6 @@ static int decode_main_header(NUTContext *nut){
     uint64_t tmp, end;
     unsigned int stream_count;
     int i, j, tmp_stream, tmp_mul, tmp_pts, tmp_size, count, tmp_res, tmp_head_idx;
-    int64_t tmp_match;
 
     end= get_packetheader(nut, bc, 1, MAIN_STARTCODE);
     end += avio_tell(bc);
@@ -218,7 +217,6 @@ static int decode_main_header(NUTContext *nut){
     tmp_pts=0;
     tmp_mul=1;
     tmp_stream=0;
-    tmp_match= 1-(1LL<<62);
     tmp_head_idx= 0;
     for(i=0; i<256;){
         int tmp_flags = ffio_read_varlen(bc);
@@ -232,7 +230,7 @@ static int decode_main_header(NUTContext *nut){
         else             tmp_res   = 0;
         if(tmp_fields>5) count     = ffio_read_varlen(bc);
         else             count     = tmp_mul - tmp_size;
-        if(tmp_fields>6) tmp_match = get_s(bc);
+        if(tmp_fields>6) get_s(bc);
         if(tmp_fields>7) tmp_head_idx= ffio_read_varlen(bc);
 
         while(tmp_fields-- > 8)

From 4c1e56ede86b68b5c600bde4ebeea6153c46f7f9 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 1 Jun 2011 17:30:28 +0100
Subject: [PATCH 518/830] oma: check avio_read() return value

---
 libavformat/oma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/oma.c b/libavformat/oma.c
index 6dd56cf702..6fdf75f17c 100644
--- a/libavformat/oma.c
+++ b/libavformat/oma.c
@@ -80,6 +80,8 @@ static int oma_read_header(AVFormatContext *s,
 
     ff_id3v2_read(s, ID3v2_EA3_MAGIC);
     ret = avio_read(s->pb, buf, EA3_HEADER_SIZE);
+    if (ret < EA3_HEADER_SIZE)
+        return -1;
 
     if (memcmp(buf, ((const uint8_t[]){'E', 'A', '3'}),3) || buf[4] != 0 || buf[5] != EA3_HEADER_SIZE) {
         av_log(s, AV_LOG_ERROR, "Couldn't find the EA3 header !\n");

From 3e3db4d93d71d25927c7c68bb92f9cc8f2003396 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 21:16:47 +0200
Subject: [PATCH 519/830] bktr: add a framerate private option.

---
 libavdevice/bktr.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index e8ff557b73..8e3b271983 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -54,11 +54,10 @@ typedef struct {
     int video_fd;
     int tuner_fd;
     int width, height;
-    int frame_rate;
-    int frame_rate_base;
     uint64_t per_frame;
     int standard;
     char *video_size; /**< String describing video size, set by a private option. */
+    char *framerate;  /**< Set by a private option. */
 } VideoData;
 
 
@@ -249,8 +248,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     VideoData *s = s1->priv_data;
     AVStream *st;
     int width, height;
-    int frame_rate;
-    int frame_rate_base;
+    AVRational fps;
     int ret = 0;
 
     if (ap->time_base.den <= 0) {
@@ -262,14 +260,18 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+    if ((ret = av_parse_video_rate(&fps, s->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         width = ap->width;
     if (ap->height > 0)
         height = ap->height;
+    if (ap->time_base.num)
+        fps = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
-    frame_rate = ap->time_base.den;
-    frame_rate_base = ap->time_base.num;
 
     st = av_new_stream(s1, 0);
     if (!st) {
@@ -280,17 +282,15 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     s->width = width;
     s->height = height;
-    s->frame_rate = frame_rate;
-    s->frame_rate_base = frame_rate_base;
-    s->per_frame = ((uint64_t)1000000 * s->frame_rate_base) / s->frame_rate;
+    s->per_frame = ((uint64_t)1000000 * fps.den) / fps.num;
 
     st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codec->pix_fmt = PIX_FMT_YUV420P;
     st->codec->codec_id = CODEC_ID_RAWVIDEO;
     st->codec->width = width;
     st->codec->height = height;
-    st->codec->time_base.den = frame_rate;
-    st->codec->time_base.num = frame_rate_base;
+    st->codec->time_base.den = fps.num;
+    st->codec->time_base.num = fps.den;
 
 #if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
@@ -314,6 +314,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
 out:
     av_freep(&s->video_size);
+    av_freep(&s->framerate);
     return ret;
 }
 
@@ -346,6 +347,7 @@ static const AVOption options[] = {
     { "PALM",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALM},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "NTSCJ",    "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSCJ}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC },
     { NULL },
 };
 

From 1556186a625fc61bcd06a6a009795f3c895459ea Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 21:16:47 +0200
Subject: [PATCH 520/830] fbdev: add a framerate private option.

---
 libavdevice/fbdev.c | 48 +++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index 6b03be4751..58b3ab4572 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -37,7 +37,10 @@
 #include <time.h>
 #include <linux/fb.h>
 
+#include "libavutil/log.h"
 #include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
 #include "libavformat/avformat.h"
 
@@ -74,8 +77,10 @@ static enum PixelFormat get_pixfmt_from_fb_varinfo(struct fb_var_screeninfo *var
 }
 
 typedef struct {
+    AVClass *class;          ///< class for private options
     int frame_size;          ///< size in bytes of a grabbed frame
-    AVRational time_base;    ///< time base
+    AVRational fps;          ///< framerate
+    char *framerate;         ///< framerate string set by a private option
     int64_t time_frame;      ///< time for the next frame to output (in 1/1000000 units)
 
     int fd;                  ///< framebuffer device file descriptor
@@ -97,16 +102,21 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     enum PixelFormat pix_fmt;
     int ret, flags = O_RDONLY;
 
+    ret = av_parse_video_rate(&fbdev->fps, fbdev->framerate);
+    av_freep(&fbdev->framerate);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+        return ret;
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->time_base.num)
+        fbdev->fps = (AVRational){ap->time_base.den, ap->time_base.num};
+#endif
+
     if (!(st = av_new_stream(avctx, 0)))
         return AVERROR(ENOMEM);
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in microseconds */
 
-    if (ap->time_base.den <= 0) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid time base %d/%d\n",
-               ap->time_base.num, ap->time_base.den);
-        return AVERROR(EINVAL);
-    }
-
     /* NONBLOCK is ignored by the fbdev driver, only set for consistency */
     if (avctx->flags & AVFMT_FLAG_NONBLOCK)
         flags |= O_NONBLOCK;
@@ -146,7 +156,6 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     fbdev->bytes_per_pixel = (fbdev->varinfo.bits_per_pixel + 7) >> 3;
     fbdev->frame_linesize  = fbdev->width * fbdev->bytes_per_pixel;
     fbdev->frame_size      = fbdev->frame_linesize * fbdev->heigth;
-    fbdev->time_base       = ap->time_base;
     fbdev->time_frame      = AV_NOPTS_VALUE;
     fbdev->data = mmap(NULL, fbdev->fixinfo.smem_len, PROT_READ, MAP_SHARED, fbdev->fd, 0);
     if (fbdev->data == MAP_FAILED) {
@@ -162,13 +171,13 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     st->codec->pix_fmt    = pix_fmt;
     st->codec->time_base  = ap->time_base;
     st->codec->bit_rate   =
-        fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel / av_q2d(ap->time_base) * 8;
+        fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel * av_q2d(fbdev->fps) * 8;
 
     av_log(avctx, AV_LOG_INFO,
-           "w:%d h:%d bpp:%d pixfmt:%s tb:%d/%d bit_rate:%d\n",
+           "w:%d h:%d bpp:%d pixfmt:%s fps:%d/%d bit_rate:%d\n",
            fbdev->width, fbdev->heigth, fbdev->varinfo.bits_per_pixel,
            av_pix_fmt_descriptors[pix_fmt].name,
-           ap->time_base.num, ap->time_base.den,
+           fbdev->fps.num, fbdev->fps.den,
            st->codec->bit_rate);
     return 0;
 
@@ -202,7 +211,7 @@ static int fbdev_read_packet(AVFormatContext *avctx, AVPacket *pkt)
         while (nanosleep(&ts, &ts) < 0 && errno == EINTR);
     }
     /* compute the time of the next frame */
-    fbdev->time_frame += INT64_C(1000000) * av_q2d(fbdev->time_base);
+    fbdev->time_frame += INT64_C(1000000) / av_q2d(fbdev->fps);
 
     if ((ret = av_new_packet(pkt, fbdev->frame_size)) < 0)
         return ret;
@@ -239,6 +248,20 @@ av_cold static int fbdev_read_close(AVFormatContext *avctx)
     return 0;
 }
 
+#define OFFSET(x) offsetof(FBDevContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "framerate","", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC },
+    { NULL },
+};
+
+static const AVClass fbdev_class = {
+    .class_name = "fbdev indev",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_fbdev_demuxer = {
     .name           = "fbdev",
     .long_name      = NULL_IF_CONFIG_SMALL("Linux framebuffer"),
@@ -247,4 +270,5 @@ AVInputFormat ff_fbdev_demuxer = {
     .read_packet    = fbdev_read_packet,
     .read_close     = fbdev_read_close,
     .flags          = AVFMT_NOFILE,
+    .priv_class     = &fbdev_class,
 };

From 121ef2e26d6fefb6b48fbc1082d351bd4d538111 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 21:16:47 +0200
Subject: [PATCH 521/830] libdc1394: add a framerate private option.

---
 libavdevice/libdc1394.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index f4340f570c..2811e3b97d 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -60,9 +60,10 @@ typedef struct dc1394_data {
     dc1394video_frame_t *frame;
 #endif
     int current_frame;
-    int fps;
+    int  frame_rate;        /**< frames per 1000 seconds (fps * 1000) */
     char *video_size;       /**< String describing video size, set by a private option. */
     char *pixel_format;     /**< Set by a private option. */
+    char *framerate;        /**< Set by a private option. */
 
     AVPacket packet;
 } dc1394_data;
@@ -102,6 +103,7 @@ static const AVOption options[] = {
 #endif
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC },
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "uyvy422"}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC },
     { NULL },
 };
 
@@ -122,7 +124,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     struct dc1394_frame_rate *fps;
     enum PixelFormat pix_fmt;
     int width, height;
-    int frame_rate           = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num);
+    AVRational framerate;
     int ret = 0;
 
     if ((pix_fmt = av_get_pix_fmt(dc1394->pixel_format)) == PIX_FMT_NONE) {
@@ -135,6 +137,10 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
         av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+    if ((ret = av_parse_video_rate(&framerate, dc1394->framerate)) < 0) {
+        av_log(c, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         width = ap->width;
@@ -142,19 +148,22 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
         height = ap->height;
     if (ap->pix_fmt)
         pix_fmt = ap->pix_fmt;
+    if (ap->time_base.num)
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
+    dc1394->frame_rate = av_rescale(1000, framerate.num, framerate.den);
 
     for (fmt = dc1394_frame_formats; fmt->width; fmt++)
          if (fmt->pix_fmt == pix_fmt && fmt->width == width && fmt->height == height)
              break;
 
     for (fps = dc1394_frame_rates; fps->frame_rate; fps++)
-         if (fps->frame_rate == frame_rate)
+         if (fps->frame_rate == dc1394->frame_rate)
              break;
 
     if (!fps->frame_rate || !fmt->width) {
         av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", avcodec_get_pix_fmt_name(pix_fmt),
-                                                                                                width, height, frame_rate);
+                                                                                                width, height, dc1394->frame_rate);
         ret = AVERROR(EINVAL);
         goto out;
     }
@@ -168,8 +177,8 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     av_set_pts_info(vst, 64, 1, 1000);
     vst->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     vst->codec->codec_id = CODEC_ID_RAWVIDEO;
-    vst->codec->time_base.den = fps->frame_rate;
-    vst->codec->time_base.num = 1000;
+    vst->codec->time_base.den = framerate.num;
+    vst->codec->time_base.num = framerate.den;
     vst->codec->width = fmt->width;
     vst->codec->height = fmt->height;
     vst->codec->pix_fmt = fmt->pix_fmt;
@@ -181,7 +190,6 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     dc1394->packet.flags |= AV_PKT_FLAG_KEY;
 
     dc1394->current_frame = 0;
-    dc1394->fps = fps->frame_rate;
 
     vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, fps->frame_rate, 1000);
     *select_fps = fps;
@@ -189,6 +197,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
 out:
     av_freep(&dc1394->video_size);
     av_freep(&dc1394->pixel_format);
+    av_freep(&dc1394->framerate);
     return ret;
 }
 
@@ -267,7 +276,7 @@ static int dc1394_v1_read_packet(AVFormatContext *c, AVPacket *pkt)
 
     if (res == DC1394_SUCCESS) {
         dc1394->packet.data = (uint8_t *)(dc1394->camera.capture_buffer);
-        dc1394->packet.pts = (dc1394->current_frame * 1000000) / dc1394->fps;
+        dc1394->packet.pts = (dc1394->current_frame * 1000000) / dc1394->frame_rate;
         res = dc1394->packet.size;
     } else {
         av_log(c, AV_LOG_ERROR, "DMA capture failed\n");
@@ -379,7 +388,7 @@ static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt)
     res = dc1394_capture_dequeue(dc1394->camera, DC1394_CAPTURE_POLICY_WAIT, &dc1394->frame);
     if (res == DC1394_SUCCESS) {
         dc1394->packet.data = (uint8_t *)(dc1394->frame->image);
-        dc1394->packet.pts = (dc1394->current_frame  * 1000000) / (dc1394->fps);
+        dc1394->packet.pts = (dc1394->current_frame  * 1000000) / (dc1394->frame_rate);
         res = dc1394->frame->image_bytes;
     } else {
         av_log(c, AV_LOG_ERROR, "DMA capture failed\n");

From c21324eeb6f3175ad2304cb77e990f73a1e810b2 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 21:16:47 +0200
Subject: [PATCH 522/830] v4l2: add a framerate private option.

---
 libavdevice/v4l2.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 80b640078d..d9dce33b9e 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -73,6 +73,7 @@ struct video_data {
     int channel;
     char *video_size; /**< String describing video size, set by a private option. */
     char *pixel_format; /**< Set by a private option. */
+    char *framerate;    /**< Set by a private option. */
 };
 
 struct buff_data {
@@ -453,13 +454,20 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     struct v4l2_standard standard;
     struct v4l2_streamparm streamparm = { 0 };
     struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe;
-    int i;
+    int i, ret;
+    AVRational fps;
 
     streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
+    if (s->framerate && (ret = av_parse_video_rate(&fps, s->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+        return ret;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->channel > 0)
         s->channel = ap->channel;
+    if (ap->time_base.num)
+        fps = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     /* set tv video input */
@@ -512,34 +520,32 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         }
     }
 
-    if (ap->time_base.num && ap->time_base.den) {
+    if (fps.num && fps.den) {
         av_log(s1, AV_LOG_DEBUG, "Setting time per frame to %d/%d\n",
-               ap->time_base.num, ap->time_base.den);
-        tpf->numerator = ap->time_base.num;
-        tpf->denominator = ap->time_base.den;
+               fps.den, fps.num);
+        tpf->numerator   = fps.den;
+        tpf->denominator = fps.num;
         if (ioctl(s->fd, VIDIOC_S_PARM, &streamparm) != 0) {
             av_log(s1, AV_LOG_ERROR,
                    "ioctl set time per frame(%d/%d) failed\n",
-                   ap->time_base.num, ap->time_base.den);
+                   fps.den, fps.num);
             return AVERROR(EIO);
         }
 
-        if (ap->time_base.den != tpf->denominator ||
-            ap->time_base.num != tpf->numerator) {
+        if (fps.num != tpf->denominator ||
+            fps.den != tpf->numerator) {
             av_log(s1, AV_LOG_INFO,
                    "The driver changed the time per frame from %d/%d to %d/%d\n",
-                   ap->time_base.num, ap->time_base.den,
+                   fps.den, fps.num,
                    tpf->numerator, tpf->denominator);
         }
     } else {
-        /* if timebase value is not set in ap, read the timebase value from the driver */
+        /* if timebase value is not set, read the timebase value from the driver */
         if (ioctl(s->fd, VIDIOC_G_PARM, &streamparm) != 0) {
             av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_PARM): %s\n", strerror(errno));
             return AVERROR(errno);
         }
     }
-    ap->time_base.num = tpf->numerator;
-    ap->time_base.den = tpf->denominator;
 
     return 0;
 }
@@ -681,6 +687,7 @@ out:
     av_freep(&s->video_size);
     av_freep(&s->pixel_format);
     av_freep(&s->standard);
+    av_freep(&s->framerate);
     return res;
 }
 
@@ -731,6 +738,7 @@ static const AVOption options[] = {
     { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From 4078ed26312adb9adc1948556464705011b28e67 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 24 May 2011 21:16:47 +0200
Subject: [PATCH 523/830] vfwcap: add a framerate private option.

---
 libavdevice/vfwcap.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index d307e11184..c021831945 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -45,6 +45,7 @@ struct vfw_ctx {
     unsigned int curbufsize;
     unsigned int frame_num;
     char *video_size;       /**< A string describing video size, set by a private option. */
+    char *framerate;        /**< Set by a private option. */
 };
 
 static enum PixelFormat vfw_pixfmt(DWORD biCompression, WORD biBitCount)
@@ -236,6 +237,7 @@ static int vfw_read_close(AVFormatContext *s)
     }
 
     av_freep(&ctx->video_size);
+    av_freep(&ctx->framerate);
 
     return 0;
 }
@@ -252,6 +254,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
     DWORD biCompression;
     WORD biBitCount;
     int ret;
+    AVRational fps;
 
     if (!strcmp(s->filename, "list")) {
         for (devnum = 0; devnum <= 9; devnum++) {
@@ -269,10 +272,10 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         return AVERROR(EIO);
     }
 
-    if(!ap->time_base.den) {
-        av_log(s, AV_LOG_ERROR, "A time base must be specified.\n");
-        return AVERROR(EIO);
-    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->time_base.num)
+        fps = (AVRational){ap->time_base.den, ap->time_base.num};
+#endif
 
     ctx->hwnd = capCreateCaptureWindow(NULL, 0, 0, 0, 0, 0, HWND_MESSAGE, 0);
     if(!ctx->hwnd) {
@@ -371,7 +374,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
     cparms.fYield = 1; // Spawn a background thread
     cparms.dwRequestMicroSecPerFrame =
-                               (ap->time_base.num*1000000) / ap->time_base.den;
+                               (fps.den*1000000) / fps.num;
     cparms.fAbortLeftMouse = 0;
     cparms.fAbortRightMouse = 0;
     cparms.fCaptureAudio = 0;
@@ -383,7 +386,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         goto fail_io;
 
     codec = st->codec;
-    codec->time_base = ap->time_base;
+    codec->time_base = (AVRational){fps.den, fps.num};
     codec->codec_type = AVMEDIA_TYPE_VIDEO;
     codec->width  = bi->bmiHeader.biWidth;
     codec->height = bi->bmiHeader.biHeight;
@@ -471,6 +474,7 @@ static int vfw_read_packet(AVFormatContext *s, AVPacket *pkt)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC },
     { NULL },
 };
 

From fefa67d536346b39973ab70b892d8ef27215b0b3 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 29 May 2011 15:22:45 +0200
Subject: [PATCH 524/830] v4l2: remove one forgotten use of
 AVFormatParameters.pix_fmt.

---
 libavdevice/v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index d9dce33b9e..0cd4f38389 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -642,7 +642,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id);
     if (desired_format == 0) {
         av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for "
-               "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt);
+               "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, pix_fmt);
         close(s->fd);
 
         res = AVERROR(EIO);

From 551dfdde70f912d0278730b28d480e32a49b1b5e Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 2 Jun 2011 21:07:53 +0100
Subject: [PATCH 525/830] targa: fix big-endian build

---
 libavcodec/targa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/targa.c b/libavcodec/targa.c
index 6a852a8528..e57fd8ba65 100644
--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@@ -210,6 +210,7 @@ static int decode_frame(AVCodecContext *avctx,
             CHECK_BUFFER_SIZE(buf, buf_end, img_size, "image data");
             for(y = 0; y < s->height; y++){
 #if HAVE_BIGENDIAN
+                int x;
                 if((s->bpp + 1) >> 3 == 2){
                     uint16_t *dst16 = (uint16_t*)dst;
                     for(x = 0; x < s->width; x++)

From b751f611065f1fe1d7216971c4b100c928a3b0d5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 20:40:09 +0200
Subject: [PATCH 526/830] Remove stray extra arguments from av_dlog()
 invocations.

---
 libavformat/mpeg.c    |  2 +-
 libavformat/mpegenc.c |  2 +-
 libavformat/mpegts.c  |  2 +-
 libavformat/oggdec.c  | 10 ++++------
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index 0b663ab512..98506612cd 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -569,7 +569,7 @@ static int mpegps_read_packet(AVFormatContext *s,
     pkt->dts = dts;
     pkt->pos = dummy_pos;
     pkt->stream_index = st->index;
-    av_dlog(s, AV_LOG_DEBUG, "%d: pts=%0.3f dts=%0.3f size=%d\n",
+    av_dlog(s, "%d: pts=%0.3f dts=%0.3f size=%d\n",
             pkt->stream_index, pkt->pts / 90000.0, pkt->dts / 90000.0,
             pkt->size);
 
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 28ca1cbd5b..98169abcf5 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -1075,7 +1075,7 @@ retry:
                 best_dts= pkt_desc->dts;
         }
 
-        av_dlog(ctx, AV_LOG_DEBUG, "bumping scr, scr:%f, dts:%f\n",
+        av_dlog(ctx, "bumping scr, scr:%f, dts:%f\n",
                 scr / 90000.0, best_dts / 90000.0);
         if(best_dts == INT64_MAX)
             return 0;
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 7c0f3818d5..33675f470f 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1555,7 +1555,7 @@ static int mpegts_read_header(AVFormatContext *s,
         s->bit_rate = (TS_PACKET_SIZE * 8) * 27e6 / ts->pcr_incr;
         st->codec->bit_rate = s->bit_rate;
         st->start_time = ts->cur_pcr;
-        av_dlog(ts->stream, AV_LOG_DEBUG, "start=%0.3f pcr=%0.3f incr=%d\n",
+        av_dlog(ts->stream, "start=%0.3f pcr=%0.3f incr=%d\n",
                 st->start_time / 1000000.0, pcrs[0] / 27e6, ts->pcr_incr);
     }
 
diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 9562ea9ec0..f1ad630c5e 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -310,7 +310,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
     int complete = 0;
     int segp = 0, psize = 0;
 
-    av_dlog(s, AV_LOG_DEBUG, "ogg_packet: curidx=%i\n", ogg->curidx);
+    av_dlog(s, "ogg_packet: curidx=%i\n", ogg->curidx);
 
     do{
         idx = ogg->curidx;
@@ -322,8 +322,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
 
         os = ogg->streams + idx;
 
-        av_dlog(s, AV_LOG_DEBUG,
-                "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n",
+        av_dlog(s, "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n",
                 idx, os->pstart, os->psize, os->segp, os->nsegs);
 
         if (!os->codec){
@@ -356,8 +355,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
         }
     }while (!complete);
 
-    av_dlog(s, AV_LOG_DEBUG,
-            "ogg_packet: idx %i, frame size %i, start %i\n",
+    av_dlog(s, "ogg_packet: idx %i, frame size %i, start %i\n",
             idx, os->psize, os->pstart);
 
     if (os->granule == -1)
@@ -438,7 +436,7 @@ static int ogg_get_headers(AVFormatContext *s)
             return -1;
     }while (!ogg->headers);
 
-    av_dlog(s, AV_LOG_DEBUG, "found headers\n");
+    av_dlog(s, "found headers\n");
 
     return 0;
 }

From b932eb1be62b47b29969667f14a207e425e79a55 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 19:00:47 +0200
Subject: [PATCH 527/830] swscale: reintroduce sws_format_name() symbol

Reintroduce the internal symbol which was removed in:

commit e1197b9e1746c03b1d13d816d1569aeaf1b71ecc
Author: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date:   Sun May 29 17:57:40 2011 +0200

    swscale: remove sws_format_name()

    Use av_get_pix_fmt_name() instead.

The symbol is used by some external libs (hi libx264!), this gives
time to them to use the recently added av_get_pix_fmt_name() rather
than an internal symbol.
---
 libswscale/swscale.h          | 5 ++++-
 libswscale/swscale_internal.h | 8 ++++++++
 libswscale/utils.c            | 7 +++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 7ac2fe27bb..e798773158 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -31,7 +31,7 @@
 
 #define LIBSWSCALE_VERSION_MAJOR 0
 #define LIBSWSCALE_VERSION_MINOR 14
-#define LIBSWSCALE_VERSION_MICRO 0
+#define LIBSWSCALE_VERSION_MICRO 1
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
                                                LIBSWSCALE_VERSION_MINOR, \
@@ -53,6 +53,9 @@
 #ifndef FF_API_SWS_CPU_CAPS
 #define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 2)
 #endif
+#ifndef FF_API_SWS_FORMAT_NAME
+#define FF_API_SWS_FORMAT_NAME  (LIBSWSCALE_VERSION_MAJOR < 2)
+#endif
 
 /**
  * Returns the LIBSWSCALE_VERSION_INT constant.
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 87712be1b3..77a91e12e0 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -349,6 +349,14 @@ void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                             const int16_t **chrVSrc, int chrFilterSize,
                             uint8_t *dest, int dstW, int dstY);
 
+#if FF_API_SWS_FORMAT_NAME
+/**
+ * @deprecated Use av_get_pix_fmt_name() instead.
+ */
+attribute_deprecated
+const char *sws_format_name(enum PixelFormat format);
+#endif
+
 //FIXME replace this with something faster
 #define is16BPS(x)      (           \
            (x)==PIX_FMT_GRAY16BE    \
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 3801ea2fd7..d8c2c0818c 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -165,6 +165,13 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt)
 
 extern const int32_t ff_yuv2rgb_coeffs[8][4];
 
+#if FF_API_SWS_FORMAT_NAME
+const char *sws_format_name(enum PixelFormat format)
+{
+    return av_get_pix_fmt_name(format);
+}
+#endif
+
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
     if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;

From e977ca2645cc6b23589ddf97ab08861064ba8792 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 17:45:33 +0200
Subject: [PATCH 528/830] lavfi: add avfilter_link_free() function

Allow to free the buffers cached in each AVFilterLink pool.
Fix leak.
---
 doc/APIchanges         |  3 +++
 libavfilter/avfilter.c | 29 +++++++++++++++++++++++++++--
 libavfilter/avfilter.h |  7 ++++++-
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index f88b7af7c1..937846ec62 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-03 - xxxxxx - lavfi 2.12.0 - avfilter_link_free()
+  Add avfilter_link_free() function.
+
 2011-05-28 - xxxxxx - lavu 51.3.0 - pixdesc.h
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index b7ad6f0503..037d5864ae 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -165,6 +165,31 @@ int avfilter_link(AVFilterContext *src, unsigned srcpad,
     return 0;
 }
 
+void avfilter_link_free(AVFilterLink **link)
+{
+    if (!*link)
+        return;
+
+    if ((*link)->pool) {
+        int i;
+        for (i = 0; i < POOL_SIZE; i++) {
+            if ((*link)->pool->pic[i]) {
+                AVFilterBufferRef *picref = (*link)->pool->pic[i];
+                /* free buffer: picrefs stored in the pool are not
+                 * supposed to contain a free callback */
+                av_freep(&picref->buf->data[0]);
+                av_freep(&picref->buf);
+
+                av_freep(&picref->audio);
+                av_freep(&picref->video);
+                av_freep(&picref);
+            }
+        }
+        av_freep(&(*link)->pool);
+    }
+    av_freep(link);
+}
+
 int avfilter_insert_filter(AVFilterLink *link, AVFilterContext *filt,
                            unsigned filt_srcpad_idx, unsigned filt_dstpad_idx)
 {
@@ -683,7 +708,7 @@ void avfilter_free(AVFilterContext *filter)
             avfilter_formats_unref(&link->in_formats);
             avfilter_formats_unref(&link->out_formats);
         }
-        av_freep(&link);
+        avfilter_link_free(&link);
     }
     for (i = 0; i < filter->output_count; i++) {
         if ((link = filter->outputs[i])) {
@@ -692,7 +717,7 @@ void avfilter_free(AVFilterContext *filter)
             avfilter_formats_unref(&link->in_formats);
             avfilter_formats_unref(&link->out_formats);
         }
-        av_freep(&link);
+        avfilter_link_free(&link);
     }
 
     av_freep(&filter->name);
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 602b2437d9..e8e2a8b9d1 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 11
+#define LIBAVFILTER_VERSION_MINOR 12
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
@@ -635,6 +635,11 @@ struct AVFilterLink {
 int avfilter_link(AVFilterContext *src, unsigned srcpad,
                   AVFilterContext *dst, unsigned dstpad);
 
+/**
+ * Free the link in *link, and set its pointer to NULL.
+ */
+void avfilter_link_free(AVFilterLink **link);
+
 /**
  * Negotiate the media format, dimensions, etc of all inputs to a filter.
  *

From 752207e36bd9ee0df0183c5ecb0a53a12caea5df Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:46:11 +0200
Subject: [PATCH 529/830] cook: Remove unused debug functions.

---
 libavcodec/cook.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 79aee2e5eb..dfba2c553b 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -166,38 +166,6 @@ typedef struct cook {
 static float     pow2tab[127];
 static float rootpow2tab[127];
 
-/* debug functions */
-
-#ifdef COOKDEBUG
-static void dump_float_table(float* table, int size, int delimiter) {
-    int i=0;
-    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
-    for (i=0 ; i<size ; i++) {
-        av_log(NULL, AV_LOG_ERROR, "%5.1f, ", table[i]);
-        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
-    }
-}
-
-static void dump_int_table(int* table, int size, int delimiter) {
-    int i=0;
-    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
-    for (i=0 ; i<size ; i++) {
-        av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]);
-        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
-    }
-}
-
-static void dump_short_table(short* table, int size, int delimiter) {
-    int i=0;
-    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
-    for (i=0 ; i<size ; i++) {
-        av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]);
-        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
-    }
-}
-
-#endif
-
 /*************** init functions ***************/
 
 /* table generator */

From df96f22d8f2b0b40d5511cde1c4cbb4a6f824c32 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:49:53 +0200
Subject: [PATCH 530/830] Replace custom debug output functions by av_dlog().

---
 libavcodec/interplayvideo.c |  24 ++---
 libavcodec/vorbisdec.c      | 194 +++++++++++++++++++-----------------
 libavcodec/vqavideo.c       |  20 ++--
 libavformat/ipmovie.c       | 104 +++++++++----------
 4 files changed, 161 insertions(+), 181 deletions(-)

diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index c12b241fcb..3bbb464aff 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -46,14 +46,6 @@
 
 #define PALETTE_COUNT 256
 
-/* debugging support */
-#define DEBUG_INTERPLAY 0
-#if DEBUG_INTERPLAY
-#define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__)
-#else
-static inline void debug_interplay(const char *format, ...) { }
-#endif
-
 typedef struct IpvideoContext {
 
     AVCodecContext *avctx;
@@ -141,7 +133,7 @@ static int ipvideo_decode_block_opcode_0x2(IpvideoContext *s)
         y =   8 + ((B - 56) / 29);
     }
 
-    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, &s->second_last_frame, x, y);
 }
 
@@ -169,7 +161,7 @@ static int ipvideo_decode_block_opcode_0x3(IpvideoContext *s)
         y = -(  8 + ((B - 56) / 29));
     }
 
-    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, &s->current_frame, x, y);
 }
 
@@ -192,7 +184,7 @@ static int ipvideo_decode_block_opcode_0x4(IpvideoContext *s)
     x = -8 + BL;
     y = -8 + BH;
 
-    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, &s->last_frame, x, y);
 }
 
@@ -207,7 +199,7 @@ static int ipvideo_decode_block_opcode_0x5(IpvideoContext *s)
     x = *s->stream_ptr++;
     y = *s->stream_ptr++;
 
-    debug_interplay ("    motion bytes = %d, %d\n", x, y);
+    av_dlog(NULL, "    motion bytes = %d, %d\n", x, y);
     return copy_from(s, &s->last_frame, x, y);
 }
 
@@ -588,7 +580,7 @@ static int ipvideo_decode_block_opcode_0x6_16(IpvideoContext *s)
     x = *s->stream_ptr++;
     y = *s->stream_ptr++;
 
-    debug_interplay ("    motion bytes = %d, %d\n", x, y);
+    av_dlog(NULL, "    motion bytes = %d, %d\n", x, y);
     return copy_from(s, &s->second_last_frame, x, y);
 }
 
@@ -965,7 +957,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s)
     static int frame = 0;
     GetBitContext gb;
 
-    debug_interplay("------------------ frame %d\n", frame);
+    av_dlog(NULL, "------------------ frame %d\n", frame);
     frame++;
 
     if (!s->is_16bpp) {
@@ -991,8 +983,8 @@ static void ipvideo_decode_opcodes(IpvideoContext *s)
         for (x = 0; x < s->avctx->width; x += 8) {
             opcode = get_bits(&gb, 4);
 
-            debug_interplay("  block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n",
-                            x, y, opcode, s->stream_ptr);
+            av_dlog(NULL, "  block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n",
+                    x, y, opcode, s->stream_ptr);
 
             if (!s->is_16bpp) {
                 s->pixel_ptr = s->current_frame.data[0] + x
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index f6ec74f4a1..c758a440ed 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -20,10 +20,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#undef V_DEBUG
-//#define V_DEBUG
-//#define AV_DEBUG(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
-
 #include <math.h>
 
 #define ALT_BITSTREAM_READER_LE
@@ -41,10 +37,6 @@
 #define V_MAX_VLCS (1 << 16)
 #define V_MAX_PARTITIONS (1 << 20)
 
-#ifndef V_DEBUG
-#define AV_DEBUG(...)
-#endif
-
 #undef NDEBUG
 #include <assert.h>
 
@@ -245,7 +237,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 
     vc->codebook_count = get_bits(gb, 8) + 1;
 
-    AV_DEBUG(" Codebooks: %d \n", vc->codebook_count);
+    av_dlog(NULL, " Codebooks: %d \n", vc->codebook_count);
 
     vc->codebooks = av_mallocz(vc->codebook_count * sizeof(*vc->codebooks));
     tmp_vlc_bits  = av_mallocz(V_MAX_VLCS * sizeof(*tmp_vlc_bits));
@@ -256,7 +248,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
         vorbis_codebook *codebook_setup = &vc->codebooks[cb];
         unsigned ordered, t, entries, used_entries = 0;
 
-        AV_DEBUG(" %u. Codebook\n", cb);
+        av_dlog(NULL, " %u. Codebook\n", cb);
 
         if (get_bits(gb, 24) != 0x564342) {
             av_log(vc->avccontext, AV_LOG_ERROR,
@@ -281,17 +273,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 
         ordered = get_bits1(gb);
 
-        AV_DEBUG(" codebook_dimensions %d, codebook_entries %u\n",
-                 codebook_setup->dimensions, entries);
+        av_dlog(NULL, " codebook_dimensions %d, codebook_entries %u\n",
+                codebook_setup->dimensions, entries);
 
         if (!ordered) {
             unsigned ce, flag;
             unsigned sparse = get_bits1(gb);
 
-            AV_DEBUG(" not ordered \n");
+            av_dlog(NULL, " not ordered \n");
 
             if (sparse) {
-                AV_DEBUG(" sparse \n");
+                av_dlog(NULL, " sparse \n");
 
                 used_entries = 0;
                 for (ce = 0; ce < entries; ++ce) {
@@ -303,7 +295,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
                         tmp_vlc_bits[ce] = 0;
                 }
             } else {
-                AV_DEBUG(" not sparse \n");
+                av_dlog(NULL, " not sparse \n");
 
                 used_entries = entries;
                 for (ce = 0; ce < entries; ++ce)
@@ -313,17 +305,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
             unsigned current_entry  = 0;
             unsigned current_length = get_bits(gb, 5) + 1;
 
-            AV_DEBUG(" ordered, current length: %u\n", current_length);  //FIXME
+            av_dlog(NULL, " ordered, current length: %u\n", current_length);  //FIXME
 
             used_entries = entries;
             for (; current_entry < used_entries && current_length <= 32; ++current_length) {
                 unsigned i, number;
 
-                AV_DEBUG(" number bits: %u ", ilog(entries - current_entry));
+                av_dlog(NULL, " number bits: %u ", ilog(entries - current_entry));
 
                 number = get_bits(gb, ilog(entries - current_entry));
 
-                AV_DEBUG(" number: %u\n", number);
+                av_dlog(NULL, " number: %u\n", number);
 
                 for (i = current_entry; i < number+current_entry; ++i)
                     if (i < used_entries)
@@ -339,7 +331,8 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
 
         codebook_setup->lookup_type = get_bits(gb, 4);
 
-        AV_DEBUG(" lookup type: %d : %s \n", codebook_setup->lookup_type, codebook_setup->lookup_type ? "vq" : "no lookup");
+        av_dlog(NULL, " lookup type: %d : %s \n", codebook_setup->lookup_type,
+                codebook_setup->lookup_type ? "vq" : "no lookup");
 
 // If the codebook is used for (inverse) VQ, calculate codevectors.
 
@@ -352,14 +345,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
             unsigned codebook_value_bits = get_bits(gb, 4) + 1;
             unsigned codebook_sequence_p = get_bits1(gb);
 
-            AV_DEBUG(" We expect %d numbers for building the codevectors. \n", codebook_lookup_values);
-            AV_DEBUG("  delta %f minmum %f \n", codebook_delta_value, codebook_minimum_value);
+            av_dlog(NULL, " We expect %d numbers for building the codevectors. \n",
+                    codebook_lookup_values);
+            av_dlog(NULL, "  delta %f minmum %f \n",
+                    codebook_delta_value, codebook_minimum_value);
 
             for (i = 0; i < codebook_lookup_values; ++i) {
                 codebook_multiplicands[i] = get_bits(gb, codebook_value_bits);
 
-                AV_DEBUG(" multiplicands*delta+minmum : %e \n", (float)codebook_multiplicands[i]*codebook_delta_value+codebook_minimum_value);
-                AV_DEBUG(" multiplicand %u\n", codebook_multiplicands[i]);
+                av_dlog(NULL, " multiplicands*delta+minmum : %e \n",
+                        (float)codebook_multiplicands[i] * codebook_delta_value + codebook_minimum_value);
+                av_dlog(NULL, " multiplicand %u\n", codebook_multiplicands[i]);
             }
 
 // Weed out unused vlcs and build codevector vector
@@ -453,8 +449,8 @@ static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc)
     for (i = 0; i < vorbis_time_count; ++i) {
         unsigned vorbis_tdtransform = get_bits(gb, 16);
 
-        AV_DEBUG(" Vorbis time domain transform %u: %u\n",
-                 vorbis_time_count, vorbis_tdtransform);
+        av_dlog(NULL, " Vorbis time domain transform %u: %u\n",
+                vorbis_time_count, vorbis_tdtransform);
 
         if (vorbis_tdtransform) {
             av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis time domain transform data nonzero. \n");
@@ -485,7 +481,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
 
         floor_setup->floor_type = get_bits(gb, 16);
 
-        AV_DEBUG(" %d. floor type %d \n", i, floor_setup->floor_type);
+        av_dlog(NULL, " %d. floor type %d \n", i, floor_setup->floor_type);
 
         if (floor_setup->floor_type == 1) {
             int maximum_class = -1;
@@ -495,29 +491,33 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
 
             floor_setup->data.t1.partitions = get_bits(gb, 5);
 
-            AV_DEBUG(" %d.floor: %d partitions \n", i, floor_setup->data.t1.partitions);
+            av_dlog(NULL, " %d.floor: %d partitions \n",
+                    i, floor_setup->data.t1.partitions);
 
             for (j = 0; j < floor_setup->data.t1.partitions; ++j) {
                 floor_setup->data.t1.partition_class[j] = get_bits(gb, 4);
                 if (floor_setup->data.t1.partition_class[j] > maximum_class)
                     maximum_class = floor_setup->data.t1.partition_class[j];
 
-                AV_DEBUG(" %d. floor %d partition class %d \n", i, j, floor_setup->data.t1.partition_class[j]);
+                av_dlog(NULL, " %d. floor %d partition class %d \n",
+                        i, j, floor_setup->data.t1.partition_class[j]);
 
             }
 
-            AV_DEBUG(" maximum class %d \n", maximum_class);
+            av_dlog(NULL, " maximum class %d \n", maximum_class);
 
             for (j = 0; j <= maximum_class; ++j) {
                 floor_setup->data.t1.class_dimensions[j] = get_bits(gb, 3) + 1;
                 floor_setup->data.t1.class_subclasses[j] = get_bits(gb, 2);
 
-                AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->data.t1.class_dimensions[j], floor_setup->data.t1.class_subclasses[j]);
+                av_dlog(NULL, " %d floor %d class dim: %d subclasses %d \n", i, j,
+                        floor_setup->data.t1.class_dimensions[j],
+                        floor_setup->data.t1.class_subclasses[j]);
 
                 if (floor_setup->data.t1.class_subclasses[j]) {
                     GET_VALIDATED_INDEX(floor_setup->data.t1.class_masterbook[j], 8, vc->codebook_count)
 
-                    AV_DEBUG("   masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]);
+                    av_dlog(NULL, "   masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]);
                 }
 
                 for (k = 0; k < (1 << floor_setup->data.t1.class_subclasses[j]); ++k) {
@@ -526,7 +526,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                         VALIDATE_INDEX(bits, vc->codebook_count)
                     floor_setup->data.t1.subclass_books[j][k] = bits;
 
-                    AV_DEBUG("    book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
+                    av_dlog(NULL, "    book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
                 }
             }
 
@@ -555,8 +555,8 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                 for (k = 0; k < floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]]; ++k, ++floor1_values) {
                     floor_setup->data.t1.list[floor1_values].x = get_bits(gb, rangebits);
 
-                    AV_DEBUG(" %u. floor1 Y coord. %d\n", floor1_values,
-                             floor_setup->data.t1.list[floor1_values].x);
+                    av_dlog(NULL, " %u. floor1 Y coord. %d\n", floor1_values,
+                            floor_setup->data.t1.list[floor1_values].x);
                 }
             }
 
@@ -609,24 +609,24 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                 return -1;
 
 #ifdef V_DEBUG /* debug output parsed headers */
-            AV_DEBUG("floor0 order: %u\n", floor_setup->data.t0.order);
-            AV_DEBUG("floor0 rate: %u\n", floor_setup->data.t0.rate);
-            AV_DEBUG("floor0 bark map size: %u\n",
-                     floor_setup->data.t0.bark_map_size);
-            AV_DEBUG("floor0 amplitude bits: %u\n",
-                     floor_setup->data.t0.amplitude_bits);
-            AV_DEBUG("floor0 amplitude offset: %u\n",
-                     floor_setup->data.t0.amplitude_offset);
-            AV_DEBUG("floor0 number of books: %u\n",
-                     floor_setup->data.t0.num_books);
-            AV_DEBUG("floor0 book list pointer: %p\n",
-                     floor_setup->data.t0.book_list);
+            /* debug output parsed headers */
+            av_dlog(NULL, "floor0 order: %u\n", floor_setup->data.t0.order);
+            av_dlog(NULL, "floor0 rate: %u\n", floor_setup->data.t0.rate);
+            av_dlog(NULL, "floor0 bark map size: %u\n",
+                    floor_setup->data.t0.bark_map_size);
+            av_dlog(NULL, "floor0 amplitude bits: %u\n",
+                    floor_setup->data.t0.amplitude_bits);
+            av_dlog(NULL, "floor0 amplitude offset: %u\n",
+                    floor_setup->data.t0.amplitude_offset);
+            av_dlog(NULL, "floor0 number of books: %u\n",
+                    floor_setup->data.t0.num_books);
+            av_dlog(NULL, "floor0 book list pointer: %p\n",
+                    floor_setup->data.t0.book_list);
             {
                 int idx;
                 for (idx = 0; idx < floor_setup->data.t0.num_books; ++idx) {
-                    AV_DEBUG("  Book %d: %u\n",
-                             idx+1,
-                             floor_setup->data.t0.book_list[idx]);
+                    av_dlog(NULL, "  Book %d: %u\n", idx + 1,
+                            floor_setup->data.t0.book_list[idx]);
                 }
             }
 #endif
@@ -648,7 +648,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
     vc->residue_count = get_bits(gb, 6)+1;
     vc->residues      = av_mallocz(vc->residue_count * sizeof(*vc->residues));
 
-    AV_DEBUG(" There are %d residues. \n", vc->residue_count);
+    av_dlog(NULL, " There are %d residues. \n", vc->residue_count);
 
     for (i = 0; i < vc->residue_count; ++i) {
         vorbis_residue *res_setup = &vc->residues[i];
@@ -657,7 +657,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
 
         res_setup->type = get_bits(gb, 16);
 
-        AV_DEBUG(" %u. residue type %d\n", i, res_setup->type);
+        av_dlog(NULL, " %u. residue type %d\n", i, res_setup->type);
 
         res_setup->begin          = get_bits(gb, 24);
         res_setup->end            = get_bits(gb, 24);
@@ -684,8 +684,9 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
         if (!res_setup->classifs)
             return AVERROR(ENOMEM);
 
-        AV_DEBUG("    begin %d end %d part.size %d classif.s %d classbook %d \n", res_setup->begin, res_setup->end, res_setup->partition_size,
-          res_setup->classifications, res_setup->classbook);
+        av_dlog(NULL, "    begin %d end %d part.size %d classif.s %d classbook %d \n",
+                res_setup->begin, res_setup->end, res_setup->partition_size,
+                res_setup->classifications, res_setup->classbook);
 
         for (j = 0; j < res_setup->classifications; ++j) {
             high_bits = 0;
@@ -694,7 +695,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
                 high_bits = get_bits(gb, 5);
             cascade[j] = (high_bits << 3) + low_bits;
 
-            AV_DEBUG("     %u class cascade depth: %d\n", j, ilog(cascade[j]));
+            av_dlog(NULL, "     %u class cascade depth: %d\n", j, ilog(cascade[j]));
         }
 
         res_setup->maxpass = 0;
@@ -703,8 +704,8 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
                 if (cascade[j]&(1 << k)) {
                     GET_VALIDATED_INDEX(res_setup->books[j][k], 8, vc->codebook_count)
 
-                    AV_DEBUG("     %u class cascade depth %u book: %d\n",
-                             j, k, res_setup->books[j][k]);
+                    av_dlog(NULL, "     %u class cascade depth %u book: %d\n",
+                            j, k, res_setup->books[j][k]);
 
                     if (k>res_setup->maxpass)
                         res_setup->maxpass = k;
@@ -727,7 +728,7 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
     vc->mapping_count = get_bits(gb, 6)+1;
     vc->mappings      = av_mallocz(vc->mapping_count * sizeof(*vc->mappings));
 
-    AV_DEBUG(" There are %d mappings. \n", vc->mapping_count);
+    av_dlog(NULL, " There are %d mappings. \n", vc->mapping_count);
 
     for (i = 0; i < vc->mapping_count; ++i) {
         vorbis_mapping *mapping_setup = &vc->mappings[i];
@@ -756,8 +757,8 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
             mapping_setup->coupling_steps = 0;
         }
 
-        AV_DEBUG("   %u mapping coupling steps: %d\n",
-                 i, mapping_setup->coupling_steps);
+        av_dlog(NULL, "   %u mapping coupling steps: %d\n",
+                i, mapping_setup->coupling_steps);
 
         if (get_bits(gb, 2)) {
             av_log(vc->avccontext, AV_LOG_ERROR, "%u. mapping setup data invalid.\n", i);
@@ -776,10 +777,9 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
             GET_VALIDATED_INDEX(mapping_setup->submap_floor[j],   8, vc->floor_count)
             GET_VALIDATED_INDEX(mapping_setup->submap_residue[j], 8, vc->residue_count)
 
-            AV_DEBUG("   %u mapping %u submap : floor %d, residue %d\n",
-                     i, j,
-                     mapping_setup->submap_floor[j],
-                     mapping_setup->submap_residue[j]);
+            av_dlog(NULL, "   %u mapping %u submap : floor %d, residue %d\n", i, j,
+                    mapping_setup->submap_floor[j],
+                    mapping_setup->submap_residue[j]);
         }
     }
     return 0;
@@ -816,8 +816,7 @@ static void create_map(vorbis_context *vc, unsigned floor_number)
 
 #ifdef V_DEBUG
     for (idx = 0; idx <= n; ++idx) {
-        AV_DEBUG("floor0 map: map at pos %d is %d\n",
-                 idx, map[idx]);
+        av_dlog(NULL, "floor0 map: map at pos %d is %d\n", idx, map[idx]);
     }
 #endif
 }
@@ -830,7 +829,7 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc)
     vc->mode_count = get_bits(gb, 6) + 1;
     vc->modes      = av_mallocz(vc->mode_count * sizeof(*vc->modes));
 
-    AV_DEBUG(" There are %d modes.\n", vc->mode_count);
+    av_dlog(NULL, " There are %d modes.\n", vc->mode_count);
 
     for (i = 0; i < vc->mode_count; ++i) {
         vorbis_mode *mode_setup = &vc->modes[i];
@@ -840,9 +839,9 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc)
         mode_setup->transformtype = get_bits(gb, 16); //FIXME check
         GET_VALIDATED_INDEX(mode_setup->mapping, 8, vc->mapping_count);
 
-        AV_DEBUG(" %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n",
-                 i, mode_setup->blockflag, mode_setup->windowtype,
-                 mode_setup->transformtype, mode_setup->mapping);
+        av_dlog(NULL, " %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n",
+                i, mode_setup->blockflag, mode_setup->windowtype,
+                mode_setup->transformtype, mode_setup->mapping);
     }
     return 0;
 }
@@ -950,7 +949,7 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
     ff_mdct_init(&vc->mdct[0], bl0, 1, -vc->scale_bias);
     ff_mdct_init(&vc->mdct[1], bl1, 1, -vc->scale_bias);
 
-    AV_DEBUG(" vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ",
+    av_dlog(NULL, " vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ",
             vc->version, vc->audio_channels, vc->audio_samplerate, vc->bitrate_maximum, vc->bitrate_nominal, vc->bitrate_minimum, vc->blocksize[0], vc->blocksize[1]);
 
 /*
@@ -1058,7 +1057,7 @@ static int vorbis_floor0_decode(vorbis_context *vc,
                     "floor0 dec: booknumber too high!\n");
             book_idx =  0;
         }
-        AV_DEBUG("floor0 dec: booknumber: %u\n", book_idx);
+        av_dlog(NULL, "floor0 dec: booknumber: %u\n", book_idx);
         codebook = vc->codebooks[vf->book_list[book_idx]];
         /* Invalid codebook! */
         if (!codebook.codevectors)
@@ -1067,13 +1066,13 @@ static int vorbis_floor0_decode(vorbis_context *vc,
         while (lsp_len<vf->order) {
             int vec_off;
 
-            AV_DEBUG("floor0 dec: book dimension: %d\n", codebook.dimensions);
-            AV_DEBUG("floor0 dec: maximum depth: %d\n", codebook.maxdepth);
+            av_dlog(NULL, "floor0 dec: book dimension: %d\n", codebook.dimensions);
+            av_dlog(NULL, "floor0 dec: maximum depth: %d\n", codebook.maxdepth);
             /* read temp vector */
             vec_off = get_vlc2(&vc->gb, codebook.vlc.table,
                                codebook.nb_bits, codebook.maxdepth)
                       * codebook.dimensions;
-            AV_DEBUG("floor0 dec: vector offset: %d\n", vec_off);
+            av_dlog(NULL, "floor0 dec: vector offset: %d\n", vec_off);
             /* copy each vector component and add last to it */
             for (idx = 0; idx < codebook.dimensions; ++idx)
                 lsp[lsp_len+idx] = codebook.codevectors[vec_off+idx] + last;
@@ -1086,7 +1085,7 @@ static int vorbis_floor0_decode(vorbis_context *vc,
         {
             int idx;
             for (idx = 0; idx < lsp_len; ++idx)
-                AV_DEBUG("floor0 dec: coeff at %d is %f\n", idx, lsp[idx]);
+                av_dlog(NULL, "floor0 dec: coeff at %d is %f\n", idx, lsp[idx]);
         }
 #endif
 
@@ -1141,7 +1140,7 @@ static int vorbis_floor0_decode(vorbis_context *vc,
         return 1;
     }
 
-    AV_DEBUG(" Floor0 decoded\n");
+    av_dlog(NULL, " Floor0 decoded\n");
 
     return 0;
 }
@@ -1168,7 +1167,7 @@ static int vorbis_floor1_decode(vorbis_context *vc,
     floor1_Y[0] = get_bits(gb, ilog(range - 1));
     floor1_Y[1] = get_bits(gb, ilog(range - 1));
 
-    AV_DEBUG("floor 0 Y %d floor 1 Y %d \n", floor1_Y[0], floor1_Y[1]);
+    av_dlog(NULL, "floor 0 Y %d floor 1 Y %d \n", floor1_Y[0], floor1_Y[1]);
 
     offset = 2;
     for (i = 0; i < vf->partitions; ++i) {
@@ -1178,7 +1177,7 @@ static int vorbis_floor1_decode(vorbis_context *vc,
         csub = (1 << cbits) - 1;
         cval = 0;
 
-        AV_DEBUG("Cbits %u\n", cbits);
+        av_dlog(NULL, "Cbits %u\n", cbits);
 
         if (cbits) // this reads all subclasses for this partition's class
             cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[class]].vlc.table,
@@ -1187,8 +1186,8 @@ static int vorbis_floor1_decode(vorbis_context *vc,
         for (j = 0; j < cdim; ++j) {
             book = vf->subclass_books[class][cval & csub];
 
-            AV_DEBUG("book %d Cbits %u cval %u  bits:%d\n",
-                     book, cbits, cval, get_bits_count(gb));
+            av_dlog(NULL, "book %d Cbits %u cval %u  bits:%d\n",
+                    book, cbits, cval, get_bits_count(gb));
 
             cval = cval >> cbits;
             if (book > -1) {
@@ -1198,7 +1197,8 @@ static int vorbis_floor1_decode(vorbis_context *vc,
                 floor1_Y[offset+j] = 0;
             }
 
-            AV_DEBUG(" floor(%d) = %d \n", vf->list[offset+j].x, floor1_Y[offset+j]);
+            av_dlog(NULL, " floor(%d) = %d \n",
+                    vf->list[offset+j].x, floor1_Y[offset+j]);
         }
         offset+=cdim;
     }
@@ -1256,15 +1256,15 @@ static int vorbis_floor1_decode(vorbis_context *vc,
             floor1_Y_final[i] = predicted;
         }
 
-        AV_DEBUG(" Decoded floor(%d) = %u / val %u\n",
-                 vf->list[i].x, floor1_Y_final[i], val);
+        av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n",
+                vf->list[i].x, floor1_Y_final[i], val);
     }
 
 // Curve synth - connect the calculated dots and convert from dB scale FIXME optimize ?
 
     ff_vorbis_floor1_render_list(vf->list, vf->x_list_dim, floor1_Y_final, floor1_flag, vf->multiplier, vec, vf->list[1].x);
 
-    AV_DEBUG(" Floor decoded\n");
+    av_dlog(NULL, " Floor decoded\n");
 
     return 0;
 }
@@ -1295,7 +1295,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
         ch_used = ch;
     }
 
-    AV_DEBUG(" residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
+    av_dlog(NULL, " residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
 
     for (pass = 0; pass <= vr->maxpass; ++pass) { // FIXME OPTIMIZE?
         uint16_t voffset, partition_count, j_times_ptns_to_read;
@@ -1309,7 +1309,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                         unsigned temp = get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
                                                  vc->codebooks[vr->classbook].nb_bits, 3);
 
-                        AV_DEBUG("Classword: %u\n", temp);
+                        av_dlog(NULL, "Classword: %u\n", temp);
 
                         assert(vr->classifications > 1 && temp <= 65536); //needed for inverse[]
                         for (i = 0; i < c_p_c; ++i) {
@@ -1354,7 +1354,8 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                                     for (l = 0; l < dim; ++l, ++voffs) {
                                         vec[voffs]+=codebook.codevectors[coffs+l];  // FPMATH
 
-                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d  \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
+                                        av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d  \n",
+                                                pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
                                     }
                                 }
                             } else if (vr_type == 2 && ch == 2 && (voffset & 1) == 0 && (dim & 1) == 0) { // most frequent case optimized
@@ -1381,7 +1382,10 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                                         vec[voffs       ] += codebook.codevectors[coffs + l    ];  // FPMATH
                                         vec[voffs + vlen] += codebook.codevectors[coffs + l + 1];  // FPMATH
 
-                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n", pass, voffset / ch + (voffs % ch) * vlen, vec[voffset / ch + (voffs % ch) * vlen], codebook.codevectors[coffs + l], coffs, l);
+                                        av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n",
+                                                pass, voffset / ch + (voffs % ch) * vlen,
+                                                vec[voffset / ch + (voffs % ch) * vlen],
+                                                codebook.codevectors[coffs + l], coffs, l);
                                     }
                                 }
 
@@ -1393,7 +1397,10 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                                     for (l = 0; l < dim; ++l, ++voffs) {
                                         vec[voffs / ch + (voffs % ch) * vlen] += codebook.codevectors[coffs + l];  // FPMATH FIXME use if and counter instead of / and %
 
-                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n", pass, voffset / ch + (voffs % ch) * vlen, vec[voffset / ch + (voffs % ch) * vlen], codebook.codevectors[coffs + l], coffs, l);
+                                        av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n",
+                                                pass, voffset / ch + (voffs % ch) * vlen,
+                                                vec[voffset / ch + (voffs % ch) * vlen],
+                                                codebook.codevectors[coffs + l], coffs, l);
                                     }
                                 }
                             }
@@ -1481,8 +1488,8 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
     vc->mode_number = mode_number;
     mapping = &vc->mappings[vc->modes[mode_number].mapping];
 
-    AV_DEBUG(" Mode number: %u , mapping: %d , blocktype %d\n", mode_number,
-             vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
+    av_dlog(NULL, " Mode number: %u , mapping: %d , blocktype %d\n", mode_number,
+            vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
 
     blockflag = vc->modes[mode_number].blockflag;
     blocksize = vc->blocksize[blockflag];
@@ -1611,7 +1618,7 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
     if (!buf_size)
         return 0;
 
-    AV_DEBUG("packet length %d \n", buf_size);
+    av_dlog(NULL, "packet length %d \n", buf_size);
 
     init_get_bits(gb, buf, buf_size*8);
 
@@ -1628,7 +1635,8 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
         return buf_size ;
     }
 
-    AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
+    av_dlog(NULL, "parsed %d bytes %d bits, returned %d samples (*ch*bits) \n",
+            get_bits_count(gb) / 8, get_bits_count(gb) % 8, len);
 
     if (vc->audio_channels > 8) {
         for (i = 0; i < vc->audio_channels; i++)
diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index a842ac7d70..caffddbfa4 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -90,14 +90,6 @@
 #define CPLZ_TAG MKBETAG('C', 'P', 'L', 'Z')
 #define VPTZ_TAG MKBETAG('V', 'P', 'T', 'Z')
 
-#define VQA_DEBUG 0
-
-#if VQA_DEBUG
-#define vqa_debug printf
-#else
-static inline void vqa_debug(const char *format, ...) { }
-#endif
-
 typedef struct VqaContext {
 
     AVCodecContext *avctx;
@@ -212,7 +204,7 @@ static void decode_format80(const unsigned char *src, int src_size,
 
     while (src_index < src_size) {
 
-        vqa_debug("      opcode %02X: ", src[src_index]);
+        av_dlog(NULL, "      opcode %02X: ", src[src_index]);
 
         /* 0x80 means that frame is finished */
         if (src[src_index] == 0x80)
@@ -231,7 +223,7 @@ static void decode_format80(const unsigned char *src, int src_size,
             src_index += 2;
             src_pos = AV_RL16(&src[src_index]);
             src_index += 2;
-            vqa_debug("(1) copy %X bytes from absolute pos %X\n", count, src_pos);
+            av_dlog(NULL, "(1) copy %X bytes from absolute pos %X\n", count, src_pos);
             CHECK_COUNT();
             for (i = 0; i < count; i++)
                 dest[dest_index + i] = dest[src_pos + i];
@@ -243,7 +235,7 @@ static void decode_format80(const unsigned char *src, int src_size,
             count = AV_RL16(&src[src_index]);
             src_index += 2;
             color = src[src_index++];
-            vqa_debug("(2) set %X bytes to %02X\n", count, color);
+            av_dlog(NULL, "(2) set %X bytes to %02X\n", count, color);
             CHECK_COUNT();
             memset(&dest[dest_index], color, count);
             dest_index += count;
@@ -253,7 +245,7 @@ static void decode_format80(const unsigned char *src, int src_size,
             count = (src[src_index++] & 0x3F) + 3;
             src_pos = AV_RL16(&src[src_index]);
             src_index += 2;
-            vqa_debug("(3) copy %X bytes from absolute pos %X\n", count, src_pos);
+            av_dlog(NULL, "(3) copy %X bytes from absolute pos %X\n", count, src_pos);
             CHECK_COUNT();
             for (i = 0; i < count; i++)
                 dest[dest_index + i] = dest[src_pos + i];
@@ -262,7 +254,7 @@ static void decode_format80(const unsigned char *src, int src_size,
         } else if (src[src_index] > 0x80) {
 
             count = src[src_index++] & 0x3F;
-            vqa_debug("(4) copy %X bytes from source to dest\n", count);
+            av_dlog(NULL, "(4) copy %X bytes from source to dest\n", count);
             CHECK_COUNT();
             memcpy(&dest[dest_index], &src[src_index], count);
             src_index += count;
@@ -273,7 +265,7 @@ static void decode_format80(const unsigned char *src, int src_size,
             count = ((src[src_index] & 0x70) >> 4) + 3;
             src_pos = AV_RB16(&src[src_index]) & 0x0FFF;
             src_index += 2;
-            vqa_debug("(5) copy %X bytes from relpos %X\n", count, src_pos);
+            av_dlog(NULL, "(5) copy %X bytes from relpos %X\n", count, src_pos);
             CHECK_COUNT();
             for (i = 0; i < count; i++)
                 dest[dest_index + i] = dest[dest_index - src_pos + i];
diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c
index e3215d3b79..e8ba0643f6 100644
--- a/libavformat/ipmovie.c
+++ b/libavformat/ipmovie.c
@@ -35,17 +35,6 @@
 #include "libavutil/intreadwrite.h"
 #include "avformat.h"
 
-/* debugging support: #define DEBUG_IPMOVIE as non-zero to see extremely
- * verbose information about the demux process */
-#define DEBUG_IPMOVIE 0
-
-#if DEBUG_IPMOVIE
-#undef printf
-#define debug_ipmovie printf
-#else
-static inline void debug_ipmovie(const char *format, ...) { }
-#endif
-
 #define CHUNK_PREAMBLE_SIZE 4
 #define OPCODE_PREAMBLE_SIZE 4
 
@@ -150,8 +139,8 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
             s->audio_frame_count +=
                 (s->audio_chunk_size - 6) / s->audio_channels;
 
-        debug_ipmovie("sending audio frame with pts %"PRId64" (%d audio frames)\n",
-            pkt->pts, s->audio_frame_count);
+        av_dlog(NULL, "sending audio frame with pts %"PRId64" (%d audio frames)\n",
+                pkt->pts, s->audio_frame_count);
 
         chunk_type = CHUNK_VIDEO;
 
@@ -195,8 +184,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
         pkt->stream_index = s->video_stream_index;
         pkt->pts = s->video_pts;
 
-        debug_ipmovie("sending video frame with pts %"PRId64"\n",
-            pkt->pts);
+        av_dlog(NULL, "sending video frame with pts %"PRId64"\n", pkt->pts);
 
         s->video_pts += s->frame_pts_inc;
 
@@ -244,36 +232,36 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
     chunk_size = AV_RL16(&chunk_preamble[0]);
     chunk_type = AV_RL16(&chunk_preamble[2]);
 
-    debug_ipmovie("chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size);
+    av_dlog(NULL, "chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size);
 
     switch (chunk_type) {
 
     case CHUNK_INIT_AUDIO:
-        debug_ipmovie("initialize audio\n");
+        av_dlog(NULL, "initialize audio\n");
         break;
 
     case CHUNK_AUDIO_ONLY:
-        debug_ipmovie("audio only\n");
+        av_dlog(NULL, "audio only\n");
         break;
 
     case CHUNK_INIT_VIDEO:
-        debug_ipmovie("initialize video\n");
+        av_dlog(NULL, "initialize video\n");
         break;
 
     case CHUNK_VIDEO:
-        debug_ipmovie("video (and audio)\n");
+        av_dlog(NULL, "video (and audio)\n");
         break;
 
     case CHUNK_SHUTDOWN:
-        debug_ipmovie("shutdown\n");
+        av_dlog(NULL, "shutdown\n");
         break;
 
     case CHUNK_END:
-        debug_ipmovie("end\n");
+        av_dlog(NULL, "end\n");
         break;
 
     default:
-        debug_ipmovie("invalid chunk\n");
+        av_dlog(NULL, "invalid chunk\n");
         chunk_type = CHUNK_BAD;
         break;
 
@@ -299,29 +287,29 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
         chunk_size -= OPCODE_PREAMBLE_SIZE;
         chunk_size -= opcode_size;
         if (chunk_size < 0) {
-            debug_ipmovie("chunk_size countdown just went negative\n");
+            av_dlog(NULL, "chunk_size countdown just went negative\n");
             chunk_type = CHUNK_BAD;
             break;
         }
 
-        debug_ipmovie("  opcode type %02X, version %d, 0x%04X bytes: ",
-            opcode_type, opcode_version, opcode_size);
+        av_dlog(NULL, "  opcode type %02X, version %d, 0x%04X bytes: ",
+                opcode_type, opcode_version, opcode_size);
         switch (opcode_type) {
 
         case OPCODE_END_OF_STREAM:
-            debug_ipmovie("end of stream\n");
+            av_dlog(NULL, "end of stream\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_END_OF_CHUNK:
-            debug_ipmovie("end of chunk\n");
+            av_dlog(NULL, "end of chunk\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_CREATE_TIMER:
-            debug_ipmovie("create timer\n");
+            av_dlog(NULL, "create timer\n");
             if ((opcode_version > 0) || (opcode_size > 6)) {
-                debug_ipmovie("bad create_timer opcode\n");
+                av_dlog(NULL, "bad create_timer opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -331,14 +319,15 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
                 break;
             }
             s->frame_pts_inc = ((uint64_t)AV_RL32(&scratch[0])) * AV_RL16(&scratch[4]);
-            debug_ipmovie("  %.2f frames/second (timer div = %d, subdiv = %d)\n",
-                1000000.0/s->frame_pts_inc, AV_RL32(&scratch[0]), AV_RL16(&scratch[4]));
+            av_dlog(NULL, "  %.2f frames/second (timer div = %d, subdiv = %d)\n",
+                    1000000.0 / s->frame_pts_inc, AV_RL32(&scratch[0]),
+                    AV_RL16(&scratch[4]));
             break;
 
         case OPCODE_INIT_AUDIO_BUFFERS:
-            debug_ipmovie("initialize audio buffers\n");
+            av_dlog(NULL, "initialize audio buffers\n");
             if ((opcode_version > 1) || (opcode_size > 10)) {
-                debug_ipmovie("bad init_audio_buffers opcode\n");
+                av_dlog(NULL, "bad init_audio_buffers opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -360,23 +349,22 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
                 s->audio_type = CODEC_ID_PCM_S16LE;
             else
                 s->audio_type = CODEC_ID_PCM_U8;
-            debug_ipmovie("audio: %d bits, %d Hz, %s, %s format\n",
-                s->audio_bits,
-                s->audio_sample_rate,
-                (s->audio_channels == 2) ? "stereo" : "mono",
-                (s->audio_type == CODEC_ID_INTERPLAY_DPCM) ?
-                "Interplay audio" : "PCM");
+            av_dlog(NULL, "audio: %d bits, %d Hz, %s, %s format\n",
+                    s->audio_bits, s->audio_sample_rate,
+                    (s->audio_channels == 2) ? "stereo" : "mono",
+                    (s->audio_type == CODEC_ID_INTERPLAY_DPCM) ?
+                    "Interplay audio" : "PCM");
             break;
 
         case OPCODE_START_STOP_AUDIO:
-            debug_ipmovie("start/stop audio\n");
+            av_dlog(NULL, "start/stop audio\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_INIT_VIDEO_BUFFERS:
-            debug_ipmovie("initialize video buffers\n");
+            av_dlog(NULL, "initialize video buffers\n");
             if ((opcode_version > 2) || (opcode_size > 8)) {
-                debug_ipmovie("bad init_video_buffers opcode\n");
+                av_dlog(NULL, "bad init_video_buffers opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -392,8 +380,8 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             } else {
                 s->video_bpp = 16;
             }
-            debug_ipmovie("video resolution: %d x %d\n",
-                s->video_width, s->video_height);
+            av_dlog(NULL, "video resolution: %d x %d\n",
+                    s->video_width, s->video_height);
             break;
 
         case OPCODE_UNKNOWN_06:
@@ -403,17 +391,17 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
         case OPCODE_UNKNOWN_13:
         case OPCODE_UNKNOWN_14:
         case OPCODE_UNKNOWN_15:
-            debug_ipmovie("unknown (but documented) opcode %02X\n", opcode_type);
+            av_dlog(NULL, "unknown (but documented) opcode %02X\n", opcode_type);
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SEND_BUFFER:
-            debug_ipmovie("send buffer\n");
+            av_dlog(NULL, "send buffer\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_AUDIO_FRAME:
-            debug_ipmovie("audio frame\n");
+            av_dlog(NULL, "audio frame\n");
 
             /* log position and move on for now */
             s->audio_chunk_offset = avio_tell(pb);
@@ -422,26 +410,26 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_SILENCE_FRAME:
-            debug_ipmovie("silence frame\n");
+            av_dlog(NULL, "silence frame\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_INIT_VIDEO_MODE:
-            debug_ipmovie("initialize video mode\n");
+            av_dlog(NULL, "initialize video mode\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_CREATE_GRADIENT:
-            debug_ipmovie("create gradient\n");
+            av_dlog(NULL, "create gradient\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SET_PALETTE:
-            debug_ipmovie("set palette\n");
+            av_dlog(NULL, "set palette\n");
             /* check for the logical maximum palette size
              * (3 * 256 + 4 bytes) */
             if (opcode_size > 0x304) {
-                debug_ipmovie("demux_ipmovie: set_palette opcode too large\n");
+                av_dlog(NULL, "demux_ipmovie: set_palette opcode too large\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -455,7 +443,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             last_color = first_color + AV_RL16(&scratch[2]) - 1;
             /* sanity check (since they are 16 bit values) */
             if ((first_color > 0xFF) || (last_color > 0xFF)) {
-                debug_ipmovie("demux_ipmovie: set_palette indexes out of range (%d -> %d)\n",
+                av_dlog(NULL, "demux_ipmovie: set_palette indexes out of range (%d -> %d)\n",
                     first_color, last_color);
                 chunk_type = CHUNK_BAD;
                 break;
@@ -473,12 +461,12 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_SET_PALETTE_COMPRESSED:
-            debug_ipmovie("set palette compressed\n");
+            av_dlog(NULL, "set palette compressed\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SET_DECODING_MAP:
-            debug_ipmovie("set decoding map\n");
+            av_dlog(NULL, "set decoding map\n");
 
             /* log position and move on for now */
             s->decode_map_chunk_offset = avio_tell(pb);
@@ -487,7 +475,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_VIDEO_DATA:
-            debug_ipmovie("set video data\n");
+            av_dlog(NULL, "set video data\n");
 
             /* log position and move on for now */
             s->video_chunk_offset = avio_tell(pb);
@@ -496,7 +484,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         default:
-            debug_ipmovie("*** unknown opcode type\n");
+            av_dlog(NULL, "*** unknown opcode type\n");
             chunk_type = CHUNK_BAD;
             break;
 

From 2366462429d9cc7ed9715c037f204fcefeff8ea4 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:55:51 +0200
Subject: [PATCH 531/830] Replace #ifdef + av_log() combinations by av_dlog().

---
 libavcodec/ivi_common.c |  4 ++--
 libavcodec/svq1dec.c    | 28 ++++++++++------------------
 libavcodec/vorbisdec.c  | 13 +++++--------
 libavdevice/dv1394.c    |  8 ++------
 4 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/libavcodec/ivi_common.c b/libavcodec/ivi_common.c
index ac4c68e05f..2b684e8b9e 100644
--- a/libavcodec/ivi_common.c
+++ b/libavcodec/ivi_common.c
@@ -418,8 +418,8 @@ int ff_ivi_decode_blocks(GetBitContext *gb, IVIBandDesc *band, IVITile *tile)
                         break;
                     pos = band->scan[scan_pos];
 
-                    if (IVI_DEBUG && !val)
-                        av_log(NULL, AV_LOG_ERROR, "Val = 0 encountered!\n");
+                    if (!val)
+                        av_dlog(NULL, "Val = 0 encountered!\n");
 
                     q = (base_tab[pos] * quant) >> 9;
                     if (q > 1)
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index ef3b4be16c..ef4cde1b7f 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -238,9 +238,9 @@ static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int
     }
 
     if ((stages > 0) && (level >= 4)) {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
-#endif
+      av_dlog(s->avctx,
+              "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",
+              stages, level);
       return -1;        /* invalid vector */
     }
 
@@ -288,9 +288,9 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
     if (stages == -1) continue; /* skip vector */
 
     if ((stages > 0) && (level >= 4)) {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
-#endif
+      av_dlog(s->avctx,
+              "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",
+              stages, level);
       return -1;        /* invalid vector */
     }
 
@@ -499,9 +499,7 @@ static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 
     if (result != 0)
     {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
-#endif
+      av_dlog(s->avctx, "Error in svq1_motion_inter_block %i\n", result);
       break;
     }
     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
@@ -512,9 +510,7 @@ static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 
     if (result != 0)
     {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
-#endif
+      av_dlog(s->avctx, "Error in svq1_motion_inter_4v_block %i\n", result);
       break;
     }
     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
@@ -660,9 +656,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
 
   if (result != 0)
   {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
-#endif
+    av_dlog(s->avctx, "Error in svq1_decode_frame_header %i\n",result);
     return result;
   }
 
@@ -729,9 +723,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
                                             linesize, pmv, x, y);
           if (result != 0)
           {
-#ifdef DEBUG_SVQ1
-    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
-#endif
+            av_dlog(s->avctx, "Error in svq1_decode_delta_block %i\n",result);
             goto err;
           }
         }
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index c758a440ed..0c1850c814 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -370,9 +370,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
                     float last = 0.0;
                     unsigned lookup_offset = i;
 
-#ifdef V_DEBUG
-                    av_log(vc->avccontext, AV_LOG_INFO, "Lookup offset %u ,", i);
-#endif
+                    av_dlog(vc->avccontext, "Lookup offset %u ,", i);
 
                     for (k = 0; k < dim; ++k) {
                         unsigned multiplicand_offset = lookup_offset % codebook_lookup_values;
@@ -383,12 +381,11 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
                     }
                     tmp_vlc_bits[j] = tmp_vlc_bits[i];
 
-#ifdef V_DEBUG
-                    av_log(vc->avccontext, AV_LOG_INFO, "real lookup offset %u, vector: ", j);
+                    av_dlog(vc->avccontext, "real lookup offset %u, vector: ", j);
                     for (k = 0; k < dim; ++k)
-                        av_log(vc->avccontext, AV_LOG_INFO, " %f ", codebook_setup->codevectors[j * dim + k]);
-                    av_log(vc->avccontext, AV_LOG_INFO, "\n");
-#endif
+                        av_dlog(vc->avccontext, " %f ",
+                                codebook_setup->codevectors[j * dim + k]);
+                    av_dlog(vc->avccontext, "\n");
 
                     ++j;
                 }
diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index c9b7a69d6f..f414eb3388 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -177,15 +177,13 @@ restart_poll:
             av_log(context, AV_LOG_ERROR, "Failed to get status: %s\n", strerror(errno));
             return AVERROR(EIO);
         }
-#ifdef DV1394_DEBUG
-        av_log(context, AV_LOG_DEBUG, "DV1394: status\n"
+        av_dlog(context, "DV1394: status\n"
                 "\tactive_frame\t%d\n"
                 "\tfirst_clear_frame\t%d\n"
                 "\tn_clear_frames\t%d\n"
                 "\tdropped_frames\t%d\n",
                 s.active_frame, s.first_clear_frame,
                 s.n_clear_frames, s.dropped_frames);
-#endif
 
         dv->avail = s.n_clear_frames;
         dv->index = s.first_clear_frame;
@@ -200,10 +198,8 @@ restart_poll:
         }
     }
 
-#ifdef DV1394_DEBUG
-    av_log(context, AV_LOG_DEBUG, "index %d, avail %d, done %d\n", dv->index, dv->avail,
+    av_dlog(context, "index %d, avail %d, done %d\n", dv->index, dv->avail,
             dv->done);
-#endif
 
     size = dv_produce_packet(dv->dv_demux, pkt,
                              dv->ring + (dv->index * DV1394_PAL_FRAME_SIZE),

From 0fc9c6554e85b601b61367574fec333fb5a074e6 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:56:29 +0200
Subject: [PATCH 532/830] Remove pointless #ifdefs around function declarations
 in a header.

---
 libavcodec/ivi_common.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/ivi_common.h b/libavcodec/ivi_common.h
index 582a1d71fc..0a5e9b1384 100644
--- a/libavcodec/ivi_common.h
+++ b/libavcodec/ivi_common.h
@@ -340,7 +340,6 @@ void ff_ivi_process_empty_tile(AVCodecContext *avctx, IVIBandDesc *band,
  */
 void ff_ivi_output_plane(IVIPlaneDesc *plane, uint8_t *dst, int dst_pitch);
 
-#if IVI_DEBUG
 /**
  *  Calculate band checksum from band data.
  */
@@ -350,6 +349,5 @@ uint16_t ivi_calc_band_checksum (IVIBandDesc *band);
  *  Verify that band data lies in range.
  */
 int ivi_check_band (IVIBandDesc *band, const uint8_t *ref, int pitch);
-#endif
 
 #endif /* AVCODEC_IVI_COMMON_H */

From fb52cf8267984f97dcb18aaf5dcbad42dd6fcacd Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:59:27 +0200
Subject: [PATCH 533/830] cook: Remove non-compiling debug output.

random_state was changed from an int to a struct and can no longer be printed
as easily as before.
---
 libavcodec/cook.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index dfba2c553b..e6e2508c84 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -1023,7 +1023,6 @@ static void dump_cook_context(COOKContext *q)
     PRINT("samples_per_channel",q->subpacket[0].samples_per_channel);
     PRINT("samples_per_frame",q->subpacket[0].samples_per_frame);
     PRINT("subbands",q->subpacket[0].subbands);
-    PRINT("random_state",q->random_state);
     PRINT("js_subband_start",q->subpacket[0].js_subband_start);
     PRINT("log2_numvector_size",q->subpacket[0].log2_numvector_size);
     PRINT("numvector_size",q->subpacket[0].numvector_size);

From ec6313ad866fc04b7b56af4d639182bf595d3829 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 23:02:34 +0200
Subject: [PATCH 534/830] vorbis: Remove pointless DEBUG #ifdef around debug
 output macros.

---
 libavcodec/vorbisdec.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 0c1850c814..ac64ae8c32 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -605,7 +605,6 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             if (!floor_setup->data.t0.lsp)
                 return -1;
 
-#ifdef V_DEBUG /* debug output parsed headers */
             /* debug output parsed headers */
             av_dlog(NULL, "floor0 order: %u\n", floor_setup->data.t0.order);
             av_dlog(NULL, "floor0 rate: %u\n", floor_setup->data.t0.rate);
@@ -626,7 +625,6 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                             floor_setup->data.t0.book_list[idx]);
                 }
             }
-#endif
         } else {
             av_log(vc->avccontext, AV_LOG_ERROR, "Invalid floor type!\n");
             return -1;
@@ -811,11 +809,9 @@ static void create_map(vorbis_context *vc, unsigned floor_number)
         vf->map_size[blockflag] = n;
     }
 
-#ifdef V_DEBUG
     for (idx = 0; idx <= n; ++idx) {
         av_dlog(NULL, "floor0 map: map at pos %d is %d\n", idx, map[idx]);
     }
-#endif
 }
 
 static int vorbis_parse_setup_hdr_modes(vorbis_context *vc)
@@ -1077,14 +1073,12 @@ static int vorbis_floor0_decode(vorbis_context *vc,
 
             lsp_len += codebook.dimensions;
         }
-#ifdef V_DEBUG
         /* DEBUG: output lsp coeffs */
         {
             int idx;
             for (idx = 0; idx < lsp_len; ++idx)
                 av_dlog(NULL, "floor0 dec: coeff at %d is %f\n", idx, lsp[idx]);
         }
-#endif
 
         /* synthesize floor output vector */
         {

From 068aa387128850da488af262d54986d03eebe423 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 23:03:54 +0200
Subject: [PATCH 535/830] vorbis: Remove non-compiling debug statement.

---
 libavcodec/vorbisdec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index ac64ae8c32..b969d45783 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -1089,9 +1089,6 @@ static int vorbis_floor0_decode(vorbis_context *vc,
             for (i = 0; i < order; i++)
                 lsp[i] = 2.0f * cos(lsp[i]);
 
-            AV_DEBUG("floor0 synth: map_size = %d; m = %d; wstep = %f\n",
-                     vf->map_size, order, wstep);
-
             i = 0;
             while (i < vf->map_size[blockflag]) {
                 int j, iter_cond = vf->map[blockflag][i];

From f190f676bc93a7e80344f2feeb3b9b44604d4717 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 2 Jun 2011 22:45:15 +0200
Subject: [PATCH 536/830] Replace custom DEBUG preprocessor trickery by the
 standard one.

---
 ffserver.c              | 4 ++--
 libavcodec/cook.c       | 5 ++---
 libavcodec/indeo5.c     | 2 +-
 libavcodec/ivi_common.c | 2 +-
 libavcodec/ivi_common.h | 2 --
 libavcodec/svq1dec.c    | 3 ---
 libavcodec/vorbis.c     | 9 +++------
 libavdevice/dv1394.c    | 3 ---
 libavdevice/vfwcap.c    | 4 +---
 libavformat/ape.c       | 4 +---
 libavformat/apetag.c    | 2 --
 libavformat/rtsp.c      | 3 ---
 12 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/ffserver.c b/ffserver.c
index 56bcba58f7..30edb05969 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -1760,7 +1760,7 @@ static int http_parse_request(HTTPContext *c)
                 }
             }
 
-#ifdef DEBUG_WMP
+#ifdef DEBUG
             http_log("\nGot request:\n%s\n", c->buffer);
 #endif
 
@@ -1790,7 +1790,7 @@ static int http_parse_request(HTTPContext *c)
         return 0;
     }
 
-#ifdef DEBUG_WMP
+#ifdef DEBUG
     if (strcmp(stream->filename + strlen(stream->filename) - 4, ".asf") == 0)
         http_log("\nGot request:\n%s\n", c->buffer);
 #endif
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index e6e2508c84..84211a6d08 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -66,7 +66,6 @@
 
 #define SUBBAND_SIZE    20
 #define MAX_SUBPACKETS   5
-//#define COOKDEBUG
 
 typedef struct {
     int *now;
@@ -1005,7 +1004,7 @@ static int cook_decode_frame(AVCodecContext *avctx,
     return avctx->block_align;
 }
 
-#ifdef COOKDEBUG
+#ifdef DEBUG
 static void dump_cook_context(COOKContext *q)
 {
     //int i=0;
@@ -1247,7 +1246,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
     else
         avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
 
-#ifdef COOKDEBUG
+#ifdef DEBUG
     dump_cook_context(q);
 #endif
     return 0;
diff --git a/libavcodec/indeo5.c b/libavcodec/indeo5.c
index 088b45a263..ba1bc18eff 100644
--- a/libavcodec/indeo5.c
+++ b/libavcodec/indeo5.c
@@ -629,7 +629,7 @@ static int decode_band(IVI5DecContext *ctx, int plane_num,
         FFSWAP(int16_t, band->rv_map->valtab[idx1], band->rv_map->valtab[idx2]);
     }
 
-#if IVI_DEBUG
+#ifdef DEBUG
     if (band->checksum_present) {
         uint16_t chksum = ivi_calc_band_checksum(band);
         if (chksum != band->checksum) {
diff --git a/libavcodec/ivi_common.c b/libavcodec/ivi_common.c
index 2b684e8b9e..bd3d4e6fd4 100644
--- a/libavcodec/ivi_common.c
+++ b/libavcodec/ivi_common.c
@@ -563,7 +563,7 @@ void ff_ivi_process_empty_tile(AVCodecContext *avctx, IVIBandDesc *band,
 }
 
 
-#if IVI_DEBUG
+#ifdef DEBUG
 uint16_t ivi_calc_band_checksum (IVIBandDesc *band)
 {
     int         x, y;
diff --git a/libavcodec/ivi_common.h b/libavcodec/ivi_common.h
index 0a5e9b1384..fd3d82515a 100644
--- a/libavcodec/ivi_common.h
+++ b/libavcodec/ivi_common.h
@@ -33,8 +33,6 @@
 #include "get_bits.h"
 #include <stdint.h>
 
-#define IVI_DEBUG 0
-
 #define IVI_VLC_BITS 13 ///< max number of bits of the ivi's huffman codes
 
 /**
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index ef4cde1b7f..fc36bc3489 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -33,7 +33,6 @@
  */
 
 
-//#define DEBUG_SVQ1
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -705,9 +704,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
           if (result != 0)
           {
-//#ifdef DEBUG_SVQ1
             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
-//#endif
             goto err;
           }
         }
diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index 7198abe38d..f413170b6a 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -20,9 +20,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#undef V_DEBUG
-//#define V_DEBUG
-
 #define ALT_BITSTREAM_READER_LE
 #include "avcodec.h"
 #include "get_bits.h"
@@ -57,7 +54,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
 
     unsigned i, j, p, code;
 
-#ifdef V_DEBUG
+#ifdef DEBUG
     GetBitContext gb;
 #endif
 
@@ -74,7 +71,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
     for (i = 0; i < bits[p]; ++i)
         exit_at_level[i+1] = 1 << i;
 
-#ifdef V_DEBUG
+#ifdef DEBUG
     av_log(NULL, AV_LOG_INFO, " %u. of %u code len %d code %d - ", p, num, bits[p], codes[p]);
     init_get_bits(&gb, (uint8_t *)&codes[p], bits[p]);
     for (i = 0; i < bits[p]; ++i)
@@ -102,7 +99,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
             exit_at_level[j] = code + (1 << (j - 1));
         codes[p] = code;
 
-#ifdef V_DEBUG
+#ifdef DEBUG
         av_log(NULL, AV_LOG_INFO, " %d. code len %d code %d - ", p, bits[p], codes[p]);
         init_get_bits(&gb, (uint8_t *)&codes[p], bits[p]);
         for (i = 0; i < bits[p]; ++i)
diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c
index f414eb3388..565cf4cc4e 100644
--- a/libavdevice/dv1394.c
+++ b/libavdevice/dv1394.c
@@ -33,9 +33,6 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavformat/avformat.h"
-
-#undef DV1394_DEBUG
-
 #include "libavformat/dv.h"
 #include "dv1394.h"
 
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index c021831945..5dd873bdcb 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -26,8 +26,6 @@
 #include <windows.h>
 #include <vfw.h>
 
-//#define DEBUG_VFW
-
 /* Defines for VFW missing from MinGW.
  * Remove this when MinGW incorporates them. */
 #define HWND_MESSAGE                ((HWND)-3)
@@ -122,7 +120,7 @@ static void dump_captureparms(AVFormatContext *s, CAPTUREPARMS *cparms)
 
 static void dump_videohdr(AVFormatContext *s, VIDEOHDR *vhdr)
 {
-#ifdef DEBUG_VFW
+#ifdef DEBUG
     av_log(s, AV_LOG_DEBUG, "VIDEOHDR\n");
     dstruct(s, vhdr, lpData, "p");
     dstruct(s, vhdr, dwBufferLength, "lu");
diff --git a/libavformat/ape.c b/libavformat/ape.c
index d7c1447b01..90b02619e0 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -26,8 +26,6 @@
 #include "avformat.h"
 #include "apetag.h"
 
-#define ENABLE_DEBUG 0
-
 /* The earliest and latest file formats supported by this library */
 #define APE_MIN_VERSION 3950
 #define APE_MAX_VERSION 3990
@@ -96,7 +94,7 @@ static int ape_probe(AVProbeData * p)
 
 static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx)
 {
-#if ENABLE_DEBUG
+#ifdef DEBUG
     int i;
 
     av_log(s, AV_LOG_DEBUG, "Descriptor Block:\n\n");
diff --git a/libavformat/apetag.c b/libavformat/apetag.c
index 6e925354af..2eb1673cc4 100644
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -24,8 +24,6 @@
 #include "avformat.h"
 #include "apetag.h"
 
-#define ENABLE_DEBUG 0
-
 #define APE_TAG_VERSION               2000
 #define APE_TAG_FOOTER_BYTES          32
 #define APE_TAG_FLAG_CONTAINS_HEADER  (1 << 31)
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index 2d1438d3ed..c78b762a36 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -45,7 +45,6 @@
 #include "url.h"
 
 //#define DEBUG
-//#define DEBUG_RTP_TCP
 
 /* Timeout values for socket poll, in ms,
  * and read_packet(), in seconds  */
@@ -860,9 +859,7 @@ int ff_rtsp_read_reply(AVFormatContext *s, RTSPMessageHeader *reply,
         q = buf;
         for (;;) {
             ret = ffurl_read_complete(rt->rtsp_hd, &ch, 1);
-#ifdef DEBUG_RTP_TCP
             av_dlog(s, "ret=%d c=%02x [%c]\n", ret, ch, ch);
-#endif
             if (ret != 1)
                 return AVERROR_EOF;
             if (ch == '\n')

From 5aa70309ad236cf7e218cdce913d8290aae04d4a Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 3 Jun 2011 00:12:43 +0200
Subject: [PATCH 537/830] In libswscale, use all lines when converting from
 422p to rgb with mmx, improve quality.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/x86/yuv2rgb_template.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index 93087ebd6d..926e3fb9c4 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -43,17 +43,14 @@
     if (h_size * depth > FFABS(dstStride[0]))                        \
         h_size -= 8;                                                 \
                                                                      \
-    if (c->srcFormat == PIX_FMT_YUV422P) {                           \
-        srcStride[1] *= 2;                                           \
-        srcStride[2] *= 2;                                           \
-    }                                                                \
+    vshift = c->srcFormat != PIX_FMT_YUV422P;                        \
                                                                      \
     __asm__ volatile ("pxor %mm4, %mm4\n\t");                        \
     for (y = 0; y < srcSliceH; y++) {                                \
         uint8_t *image    = dst[0] + (y + srcSliceY) * dstStride[0]; \
         const uint8_t *py = src[0] +               y * srcStride[0]; \
-        const uint8_t *pu = src[1] +        (y >> 1) * srcStride[1]; \
-        const uint8_t *pv = src[2] +        (y >> 1) * srcStride[2]; \
+        const uint8_t *pu = src[1] +   (y >> vshift) * srcStride[1]; \
+        const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
         x86_reg index = -h_size / 2;                                 \
 
 #define YUV2RGB_INITIAL_LOAD          \
@@ -188,7 +185,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(2)
 
@@ -216,7 +213,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(2)
 
@@ -306,7 +303,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(3)
 
@@ -324,7 +321,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(3)
 
@@ -368,7 +365,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -389,7 +386,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -411,7 +408,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -432,7 +429,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 

From 1212d5b5e45675701071eed47dbad21ee1a61d6f Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 3 Jun 2011 00:28:52 +0200
Subject: [PATCH 538/830] ffmpeg: In print_report, use int64_t for pts to check
 for 0 and avoid inf value for bitrate.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index e962f1562a..0f32266977 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1379,7 +1379,8 @@ static void print_report(AVFormatContext **output_files,
     int64_t total_size;
     AVCodecContext *enc;
     int frame_number, vid, i;
-    double bitrate, ti1, pts;
+    double bitrate;
+    int64_t pts = INT64_MAX;
     static int64_t last_time = -1;
     static int qp_histogram[52];
 
@@ -1404,7 +1405,6 @@ static void print_report(AVFormatContext **output_files,
         total_size= avio_tell(oc->pb);
 
     buf[0] = '\0';
-    ti1 = 1e10;
     vid = 0;
     for(i=0;i<nb_ostreams;i++) {
         float q = -1;
@@ -1455,19 +1455,16 @@ static void print_report(AVFormatContext **output_files,
             vid = 1;
         }
         /* compute min output value */
-        pts = (double)ost->st->pts.val * av_q2d(ost->st->time_base);
-        if ((pts < ti1) && (pts > 0))
-            ti1 = pts;
+        pts = FFMIN(pts, av_rescale_q(ost->st->pts.val,
+                                      ost->st->time_base, AV_TIME_BASE_Q));
     }
-    if (ti1 < 0.01)
-        ti1 = 0.01;
 
     if (verbose > 0 || is_last_report) {
-        bitrate = (double)(total_size * 8) / ti1 / 1000.0;
+        bitrate = pts ? total_size * 8 / (pts / 1000.0) : 0;
 
         snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
             "size=%8.0fkB time=%0.2f bitrate=%6.1fkbits/s",
-            (double)total_size / 1024, ti1, bitrate);
+            (double)total_size / 1024, pts/(double)AV_TIME_BASE, bitrate);
 
         if (nb_frames_dup || nb_frames_drop)
           snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",

From dd471070215c5ca78f2cb99efc66ea5e33b39808 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 3 Jun 2011 00:29:36 +0200
Subject: [PATCH 539/830] In print_report, print progression time in
 hours:mins:secs:us

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 0f32266977..a78adf37c5 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1460,11 +1460,23 @@ static void print_report(AVFormatContext **output_files,
     }
 
     if (verbose > 0 || is_last_report) {
+        int hours, mins, secs, us;
+        secs = pts / AV_TIME_BASE;
+        us = pts % AV_TIME_BASE;
+        mins = secs / 60;
+        secs %= 60;
+        hours = mins / 60;
+        mins %= 60;
+
         bitrate = pts ? total_size * 8 / (pts / 1000.0) : 0;
 
         snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
-            "size=%8.0fkB time=%0.2f bitrate=%6.1fkbits/s",
-            (double)total_size / 1024, pts/(double)AV_TIME_BASE, bitrate);
+                 "size=%8.0fkB time=", total_size / 1024.0);
+        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
+                 "%02d:%02d:%02d.%02d ", hours, mins, secs,
+                 (100 * us) / AV_TIME_BASE);
+        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
+                 "bitrate=%6.1fkbits/s", bitrate);
 
         if (nb_frames_dup || nb_frames_drop)
           snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",

From 9034001b17077e9da5205c4344eb1b88b9882f03 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Jun 2011 03:43:46 +0200
Subject: [PATCH 540/830] h264 fill_filter_caches: Dont init chroma nnz_cache.

Found-by: Dark Shikari
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 02152ad621..d40ce0d049 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2426,11 +2426,10 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     if(IS_INTRA(mb_type))
         return 0;
 
-    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
-    AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
+    AV_COPY32(&h->non_zero_count_cache[4+8*1], &h->non_zero_count[mb_xy][ 4]);
+    AV_COPY32(&h->non_zero_count_cache[4+8*2], &h->non_zero_count[mb_xy][12]);
     AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
+    AV_COPY32(&h->non_zero_count_cache[4+8*4], &h->non_zero_count[mb_xy][28]);
 
     h->cbp= h->cbp_table[mb_xy];
 

From 0d793db1b0a4edff37fee8318f5656ffe6fe3951 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 19:19:12 -0700
Subject: [PATCH 541/830] swscale: remove unused COMPILE_TEMPLATE_ALTIVEC.

---
 libswscale/ppc/swscale_template.c | 4 ----
 libswscale/swscale.c              | 3 ---
 2 files changed, 7 deletions(-)

diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 3ee100a57c..01afe43bb2 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -19,11 +19,8 @@
  */
 
 
-#if COMPILE_TEMPLATE_ALTIVEC
 #include "swscale_altivec_template.c"
-#endif
 
-#if COMPILE_TEMPLATE_ALTIVEC
 static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **lumSrc, int lumFilterSize,
                                     const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -61,7 +58,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                        chrFilter, chrUSrc, chrVSrc, chrFilterSize,
                        alpSrc, dest, dstW, dstY);
 }
-#endif
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 02cf7cb382..d6bdfdd3cc 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1186,14 +1186,11 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
 //Plain C versions
 
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_ALTIVEC 0
 
 #include "swscale_template.c"
 
 #if HAVE_ALTIVEC
 #undef RENAME
-#undef COMPILE_TEMPLATE_ALTIVEC
-#define COMPILE_TEMPLATE_ALTIVEC 1
 #define RENAME(a) a ## _altivec
 #include "ppc/swscale_template.c"
 #endif

From bd7c1d35a603dd07818d648c08839844bab902ce Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 07:36:14 +0200
Subject: [PATCH 542/830] bktr: get default framerate from video standard.

---
 libavdevice/bktr.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 8e3b271983..7cae48f5fd 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -256,10 +256,32 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         goto out;
     }
 
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->standard) {
+        if (!strcasecmp(ap->standard, "pal"))
+            s->standard = PAL;
+        else if (!strcasecmp(ap->standard, "secam"))
+            s->standard = SECAM;
+        else if (!strcasecmp(ap->standard, "ntsc"))
+            s->standard = NTSC;
+    }
+#endif
+
     if ((ret = av_parse_video_size(&width, &height, s->video_size)) < 0) {
         av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+
+    if (!s->framerate)
+        switch (s->standard) {
+        case PAL:   s->framerate = av_strdup("pal");  break;
+        case NTSC:  s->framerate = av_strdup("ntsc"); break;
+        case SECAM: s->framerate = av_strdup("25");   break;
+        default:
+            av_log(s1, AV_LOG_ERROR, "Unknown standard.\n");
+            ret = AVERROR(EINVAL);
+            goto out;
+        }
     if ((ret = av_parse_video_rate(&fps, s->framerate)) < 0) {
         av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n");
         goto out;
@@ -292,16 +314,6 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->time_base.den = fps.num;
     st->codec->time_base.num = fps.den;
 
-#if FF_API_FORMAT_PARAMETERS
-    if (ap->standard) {
-        if (!strcasecmp(ap->standard, "pal"))
-            s->standard = PAL;
-        else if (!strcasecmp(ap->standard, "secam"))
-            s->standard = SECAM;
-        else if (!strcasecmp(ap->standard, "ntsc"))
-            s->standard = NTSC;
-    }
-#endif
 
     if (bktr_init(s1->filename, width, height, s->standard,
             &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0) {
@@ -347,7 +359,7 @@ static const AVOption options[] = {
     { "PALM",     "", 0, FF_OPT_TYPE_CONST, {.dbl = PALM},  0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "NTSCJ",    "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSCJ}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC },
-    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From a606aa7570ef71c33569d64cda1b42743eb90682 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 07:37:44 +0200
Subject: [PATCH 543/830] rawdec: don't leak option strings.

---
 libavformat/rawdec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 4f974d7045..87583e1cd9 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -97,7 +97,6 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
             st->codec->width  = width;
             st->codec->height = height;
             st->codec->pix_fmt = pix_fmt;
-            break;
 fail:
             av_freep(&s1->video_size);
             av_freep(&s1->pixel_format);

From c1dfb72d3563b0e3b7079c9be2d402ab0e48d3f2 Mon Sep 17 00:00:00 2001
From: Vladimir Pantelic <vladoman@gmail.com>
Date: Fri, 3 Jun 2011 11:00:15 +0200
Subject: [PATCH 544/830] Remove unused get_psnr() prototype

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dsputil.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 74230cadbb..cfc574aebb 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -687,11 +687,6 @@ void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
 #   define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
 #endif
 
-/* PSNR */
-void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
-              int orig_linesize[3], int coded_linesize,
-              AVCodecContext *avctx);
-
 #define WRAPPER8_16(name8, name16)\
 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
     return name8(s, dst           , src           , stride, h)\

From 1f94c31f69e6eb7eee80d0a3b42875261a18dcbe Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 13:40:54 +0200
Subject: [PATCH 545/830] rawdec: initialize return value to 0.

---
 libavformat/rawdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 87583e1cd9..3d8125f42c 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -70,7 +70,7 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
             }
         case AVMEDIA_TYPE_VIDEO: {
             FFRawVideoDemuxerContext *s1 = s->priv_data;
-            int width = 0, height = 0, ret;
+            int width = 0, height = 0, ret = 0;
             enum PixelFormat pix_fmt;
 
             if(ap->time_base.num)

From 8dfc6d1f7c33a482aba23e75870076f79b0ea485 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Thu, 5 May 2011 14:18:33 +0200
Subject: [PATCH 546/830] svq3: Move svq3-specific fields to their own context.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/h264.h |  7 ----
 libavcodec/svq3.c | 97 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 5c275e2de0..8c4f1ab21a 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -594,13 +594,6 @@ typedef struct H264Context{
     // Timestamp stuff
     int sei_buffering_period_present;  ///< Buffering period SEI flag
     int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
-
-    //SVQ3 specific fields
-    int halfpel_flag;
-    int thirdpel_flag;
-    int unknown_svq3_flag;
-    int next_slice_index;
-    uint32_t svq3_watermark_key;
 }H264Context;
 
 
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index bc0215eff5..b55fa52fdb 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -63,6 +63,15 @@
  * svq3 decoder.
  */
 
+typedef struct {
+    H264Context h;
+    int halfpel_flag;
+    int thirdpel_flag;
+    int unknown_flag;
+    int next_slice_index;
+    uint32_t watermark_key;
+} SVQ3Context;
+
 #define FULLPEL_MODE  1
 #define HALFPEL_MODE  2
 #define THIRDPEL_MODE 3
@@ -423,8 +432,9 @@ static inline int svq3_mc_dir(H264Context *h, int size, int mode, int dir,
     return 0;
 }
 
-static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
+static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
 {
+    H264Context *h = &svq3->h;
     int i, j, k, m, dir, mode;
     int cbp = 0;
     uint32_t vlc;
@@ -456,9 +466,9 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
             mb_type = MB_TYPE_16x16;
         }
     } else if (mb_type < 8) {     /* INTER */
-        if (h->thirdpel_flag && h->halfpel_flag == !get_bits1 (&s->gb)) {
+        if (svq3->thirdpel_flag && svq3->halfpel_flag == !get_bits1 (&s->gb)) {
             mode = THIRDPEL_MODE;
-        } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits1 (&s->gb)) {
+        } else if (svq3->halfpel_flag && svq3->thirdpel_flag == !get_bits1 (&s->gb)) {
             mode = HALFPEL_MODE;
         } else {
             mode = FULLPEL_MODE;
@@ -701,9 +711,11 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
     return 0;
 }
 
-static int svq3_decode_slice_header(H264Context *h)
+static int svq3_decode_slice_header(AVCodecContext *avctx)
 {
-    MpegEncContext *const s = (MpegEncContext *) h;
+    SVQ3Context *svq3 = avctx->priv_data;
+    H264Context *h = &svq3->h;
+    MpegEncContext *s = &h->s;
     const int mb_xy = h->mb_xy;
     int i, header;
 
@@ -711,24 +723,24 @@ static int svq3_decode_slice_header(H264Context *h)
 
     if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
         /* TODO: what? */
-        av_log(h->s.avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
+        av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
         return -1;
     } else {
         int length = (header >> 5) & 3;
 
-        h->next_slice_index = get_bits_count(&s->gb) + 8*show_bits(&s->gb, 8*length) + 8*length;
+        svq3->next_slice_index = get_bits_count(&s->gb) + 8*show_bits(&s->gb, 8*length) + 8*length;
 
-        if (h->next_slice_index > s->gb.size_in_bits) {
-            av_log(h->s.avctx, AV_LOG_ERROR, "slice after bitstream end\n");
+        if (svq3->next_slice_index > s->gb.size_in_bits) {
+            av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
             return -1;
     }
 
-        s->gb.size_in_bits = h->next_slice_index - 8*(length - 1);
+        s->gb.size_in_bits = svq3->next_slice_index - 8*(length - 1);
         skip_bits(&s->gb, 8);
 
-        if (h->svq3_watermark_key) {
+        if (svq3->watermark_key) {
             uint32_t header = AV_RL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1]);
-            AV_WL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1], header ^ h->svq3_watermark_key);
+            AV_WL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1], header ^ svq3->watermark_key);
         }
         if (length > 0) {
             memcpy((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3],
@@ -759,7 +771,7 @@ static int svq3_decode_slice_header(H264Context *h)
     /* unknown fields */
     skip_bits1(&s->gb);
 
-    if (h->unknown_svq3_flag) {
+    if (svq3->unknown_flag) {
         skip_bits1(&s->gb);
     }
 
@@ -788,8 +800,9 @@ static int svq3_decode_slice_header(H264Context *h)
 
 static av_cold int svq3_decode_init(AVCodecContext *avctx)
 {
-    MpegEncContext *const s = avctx->priv_data;
-    H264Context *const h = avctx->priv_data;
+    SVQ3Context *svq3 = avctx->priv_data;
+    H264Context *h = &svq3->h;
+    MpegEncContext *s = &h->s;
     int m;
     unsigned char *extradata;
     unsigned int size;
@@ -804,10 +817,11 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = avctx->codec->pix_fmts[0];
 
     if (!s->context_initialized) {
-        h->halfpel_flag      = 1;
-        h->thirdpel_flag     = 1;
-        h->unknown_svq3_flag = 0;
-        h->chroma_qp[0]      = h->chroma_qp[1] = 4;
+        h->chroma_qp[0] = h->chroma_qp[1] = 4;
+
+        svq3->halfpel_flag  = 1;
+        svq3->thirdpel_flag = 1;
+        svq3->unknown_flag  = 0;
 
         /* prowl for the "SEQH" marker in the extradata */
         extradata = (unsigned char *)avctx->extradata;
@@ -842,8 +856,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
                     break;
             }
 
-            h->halfpel_flag  = get_bits1(&gb);
-            h->thirdpel_flag = get_bits1(&gb);
+            svq3->halfpel_flag  = get_bits1(&gb);
+            svq3->thirdpel_flag = get_bits1(&gb);
 
             /* unknown fields */
             skip_bits1(&gb);
@@ -860,9 +874,9 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
                 skip_bits(&gb, 8);
             }
 
-            h->unknown_svq3_flag = get_bits1(&gb);
+            svq3->unknown_flag = get_bits1(&gb);
             avctx->has_b_frames = !s->low_delay;
-            if (h->unknown_svq3_flag) {
+            if (svq3->unknown_flag) {
 #if CONFIG_ZLIB
                 unsigned watermark_width  = svq3_get_ue_golomb(&gb);
                 unsigned watermark_height = svq3_get_ue_golomb(&gb);
@@ -885,9 +899,9 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
                     av_free(buf);
                     return -1;
                 }
-                h->svq3_watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
-                h->svq3_watermark_key = h->svq3_watermark_key << 16 | h->svq3_watermark_key;
-                av_log(avctx, AV_LOG_DEBUG, "watermark key %#x\n", h->svq3_watermark_key);
+                svq3->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
+                svq3->watermark_key = svq3->watermark_key << 16 | svq3->watermark_key;
+                av_log(avctx, AV_LOG_DEBUG, "watermark key %#x\n", svq3->watermark_key);
                 av_free(buf);
 #else
                 av_log(avctx, AV_LOG_ERROR, "this svq3 file contains watermark which need zlib support compiled in\n");
@@ -915,9 +929,10 @@ static int svq3_decode_frame(AVCodecContext *avctx,
                              AVPacket *avpkt)
 {
     const uint8_t *buf = avpkt->data;
+    SVQ3Context *svq3 = avctx->priv_data;
+    H264Context *h = &svq3->h;
+    MpegEncContext *s = &h->s;
     int buf_size = avpkt->size;
-    MpegEncContext *const s = avctx->priv_data;
-    H264Context *const h = avctx->priv_data;
     int m, mb_type;
 
     /* special case for last picture */
@@ -934,7 +949,7 @@ static int svq3_decode_frame(AVCodecContext *avctx,
 
     s->mb_x = s->mb_y = h->mb_xy = 0;
 
-    if (svq3_decode_slice_header(h))
+    if (svq3_decode_slice_header(avctx))
         return -1;
 
     s->pict_type = h->slice_type;
@@ -942,7 +957,7 @@ static int svq3_decode_frame(AVCodecContext *avctx,
 
     if (avctx->debug&FF_DEBUG_PICT_INFO){
         av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
-               av_get_picture_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
+               av_get_picture_type_char(s->pict_type), svq3->halfpel_flag, svq3->thirdpel_flag,
                s->adaptive_quant, s->qscale, h->slice_num);
     }
 
@@ -1006,10 +1021,10 @@ static int svq3_decode_frame(AVCodecContext *avctx,
             if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits &&
                 ((get_bits_count(&s->gb) & 7) == 0 || show_bits(&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
 
-                skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
+                skip_bits(&s->gb, svq3->next_slice_index - get_bits_count(&s->gb));
                 s->gb.size_in_bits = 8*buf_size;
 
-                if (svq3_decode_slice_header(h))
+                if (svq3_decode_slice_header(avctx))
                     return -1;
 
                 /* TODO: support s->mb_skip_run */
@@ -1022,7 +1037,7 @@ static int svq3_decode_frame(AVCodecContext *avctx,
             } else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4) {
                 mb_type += 4;
             }
-            if (mb_type > 33 || svq3_decode_mb(h, mb_type)) {
+            if (mb_type > 33 || svq3_decode_mb(svq3, mb_type)) {
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
@@ -1056,15 +1071,27 @@ static int svq3_decode_frame(AVCodecContext *avctx,
     return buf_size;
 }
 
+static int svq3_decode_end(AVCodecContext *avctx)
+{
+    SVQ3Context *svq3 = avctx->priv_data;
+    H264Context *h = &svq3->h;
+    MpegEncContext *s = &h->s;
+
+    ff_h264_free_context(h);
+
+    MPV_common_end(s);
+
+    return 0;
+}
 
 AVCodec ff_svq3_decoder = {
     "svq3",
     AVMEDIA_TYPE_VIDEO,
     CODEC_ID_SVQ3,
-    sizeof(H264Context),
+    sizeof(SVQ3Context),
     svq3_decode_init,
     NULL,
-    ff_h264_decode_end,
+    svq3_decode_end,
     svq3_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),

From 93d06bd1df73861c73bc12c659aecdf404d1de2a Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Wed, 4 May 2011 18:45:36 -0700
Subject: [PATCH 547/830] svq3: Check negative mb_type to fix potential crash.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/svq3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index b55fa52fdb..7cde5e5552 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -1037,7 +1037,7 @@ static int svq3_decode_frame(AVCodecContext *avctx,
             } else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4) {
                 mb_type += 4;
             }
-            if (mb_type > 33 || svq3_decode_mb(svq3, mb_type)) {
+            if ((unsigned)mb_type > 33 || svq3_decode_mb(svq3, mb_type)) {
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }

From 94bed8e582eed1268ddc0d2b88cad21d8c638774 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 3 Jun 2011 11:42:07 +0200
Subject: [PATCH 548/830] Replace avcodec_get_pix_fmt_name() by
 av_get_pix_fmt_name().

This fixes warnings about avcodec_get_pix_fmt_name() being deprecated.
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 ffmpeg.c                | 4 ++--
 libavcodec/dv.c         | 3 ++-
 libavcodec/targaenc.c   | 3 ++-
 libavcodec/utils.c      | 2 +-
 libavdevice/libdc1394.c | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 15ac03e3ed..1c4c4b15db 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1171,8 +1171,8 @@ static void do_video_out(AVFormatContext *s,
         av_log(NULL, AV_LOG_INFO,
                "Input stream #%d.%d frame changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n",
                ist->file_index, ist->st->index,
-               ost->resample_width, ost->resample_height, avcodec_get_pix_fmt_name(ost->resample_pix_fmt),
-               dec->width         , dec->height         , avcodec_get_pix_fmt_name(dec->pix_fmt));
+               ost->resample_width, ost->resample_height, av_get_pix_fmt_name(ost->resample_pix_fmt),
+               dec->width         , dec->height         , av_get_pix_fmt_name(dec->pix_fmt));
         if(!ost->video_resample)
             ffmpeg_exit(1);
     }
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index 4ee4e5ef99..cd682ecb2b 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -38,6 +38,7 @@
  * DV codec.
  */
 #define ALT_BITSTREAM_READER
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "get_bits.h"
@@ -350,7 +351,7 @@ static av_cold int dvvideo_init_encoder(AVCodecContext *avctx)
 {
     if (!ff_dv_codec_profile(avctx)) {
         av_log(avctx, AV_LOG_ERROR, "Found no DV profile for %ix%i %s video\n",
-               avctx->width, avctx->height, avcodec_get_pix_fmt_name(avctx->pix_fmt));
+               avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt));
         return -1;
     }
 
diff --git a/libavcodec/targaenc.c b/libavcodec/targaenc.c
index 828ab11c94..276bcc83eb 100644
--- a/libavcodec/targaenc.c
+++ b/libavcodec/targaenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "rle.h"
 #include "targa.h"
@@ -119,7 +120,7 @@ static int targa_encode_frame(AVCodecContext *avctx,
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Pixel format '%s' not supported.\n",
-               avcodec_get_pix_fmt_name(avctx->pix_fmt));
+               av_get_pix_fmt_name(avctx->pix_fmt));
         return AVERROR(EINVAL);
     }
     bpp = outbuf[16] >> 3;
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 0eb5afd63c..65792a6f5a 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -933,7 +933,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         if (enc->pix_fmt != PIX_FMT_NONE) {
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", %s",
-                     avcodec_get_pix_fmt_name(enc->pix_fmt));
+                     av_get_pix_fmt_name(enc->pix_fmt));
         }
         if (enc->width) {
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 2811e3b97d..b17d0fb441 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -162,7 +162,7 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
              break;
 
     if (!fps->frame_rate || !fmt->width) {
-        av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", avcodec_get_pix_fmt_name(pix_fmt),
+        av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", av_get_pix_fmt_name(pix_fmt),
                                                                                                 width, height, dc1394->frame_rate);
         ret = AVERROR(EINVAL);
         goto out;

From 5e1166b31be45b37cbbd14eecfa1b260190ac651 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 3 Jun 2011 12:52:29 +0100
Subject: [PATCH 549/830] Mark some variables with av_unused

Most of these variables are only used in av_dlog statements, some
are required but not used by other macros.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/flacenc.c   |  2 +-
 libavcodec/wmaprodec.c |  2 +-
 libavformat/mov.c      |  2 +-
 libavformat/nsvdec.c   |  5 +++--
 libavformat/r3d.c      | 11 +++++++----
 libavformat/rtpdec.c   |  2 +-
 libavformat/utils.c    |  2 +-
 7 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 8624a6d987..be775cabd3 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -980,7 +980,7 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
 
 static int count_frame_header(FlacEncodeContext *s)
 {
-    uint8_t tmp;
+    uint8_t av_unused tmp;
     int count;
 
     /*
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index ab2cd5c596..4ba8c455ab 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -1320,7 +1320,7 @@ static int decode_frame(WMAProDecodeCtx *s)
     /** no idea what these are for, might be the number of samples
         that need to be skipped at the beginning or end of a stream */
     if (get_bits1(gb)) {
-        int skip;
+        int av_unused skip;
 
         /** usually true for the first frame */
         if (get_bits1(gb)) {
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 8ec0d19ed2..6c3c95caa2 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -428,7 +428,7 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
     uint32_t type;
-    uint32_t ctype;
+    uint32_t av_unused ctype;
 
     if (c->fc->nb_streams < 1) // meta before first trak
         return 0;
diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 8f55bbc841..037a6f166a 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -267,7 +267,8 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap)
 {
     NSVContext *nsv = s->priv_data;
     AVIOContext *pb = s->pb;
-    unsigned int file_size, size;
+    unsigned int av_unused file_size;
+    unsigned int size;
     int64_t duration;
     int strings_size;
     int table_entries;
@@ -546,7 +547,7 @@ static int nsv_read_chunk(AVFormatContext *s, int fill_header)
     uint32_t vsize;
     uint16_t asize;
     uint16_t auxsize;
-    uint32_t auxtag;
+    uint32_t av_unused auxtag;
 
     av_dlog(s, "%s(%d)\n", __FUNCTION__, fill_header);
 
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index 1b5dc1972a..148c6022bb 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -52,7 +52,8 @@ static int r3d_read_red1(AVFormatContext *s)
 {
     AVStream *st = av_new_stream(s, 0);
     char filename[258];
-    int tmp, tmp2;
+    int tmp;
+    int av_unused tmp2;
 
     if (!st)
         return AVERROR(ENOMEM);
@@ -139,7 +140,7 @@ static int r3d_read_rdvo(AVFormatContext *s, Atom *atom)
 static void r3d_read_reos(AVFormatContext *s)
 {
     R3DContext *r3d = s->priv_data;
-    int tmp;
+    int av_unused tmp;
 
     r3d->rdvo_offset = avio_rb32(s->pb);
     avio_rb32(s->pb); // rdvs offset
@@ -209,7 +210,8 @@ static int r3d_read_header(AVFormatContext *s, AVFormatParameters *ap)
 static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom)
 {
     AVStream *st = s->streams[0];
-    int tmp, tmp2;
+    int tmp;
+    int av_unused tmp2;
     uint64_t pos = avio_tell(s->pb);
     unsigned dts;
     int ret;
@@ -263,7 +265,8 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom)
 static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
 {
     AVStream *st = s->streams[1];
-    int tmp, tmp2, samples, size;
+    int av_unused tmp, tmp2;
+    int samples, size;
     uint64_t pos = avio_tell(s->pb);
     unsigned dts;
     int ret;
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index 4623088c38..a910cf08bb 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -321,7 +321,7 @@ int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count)
     avio_flush(pb);
     len = avio_close_dyn_buf(pb, &buf);
     if ((len > 0) && buf) {
-        int result;
+        int av_unused result;
         av_dlog(s->ic, "sending %d bytes of RR\n", len);
         result= ffurl_write(s->rtp_ctx, buf, len);
         av_dlog(s->ic, "result from ffurl_write: %d\n", result);
diff --git a/libavformat/utils.c b/libavformat/utils.c
index b844ab95bc..1050959dcb 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1982,7 +1982,7 @@ static void av_estimate_timings(AVFormatContext *ic, int64_t old_offset)
 
     {
         int i;
-        AVStream *st;
+        AVStream av_unused *st;
         for(i = 0;i < ic->nb_streams; i++) {
             st = ic->streams[i];
             av_dlog(ic, "%d: start_time: %0.3f duration: %0.3f\n", i,

From a5514a84748e90adbbfa2661ccea1e004e6ef604 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 30 May 2011 17:28:43 +0200
Subject: [PATCH 550/830] build: Simplify texi2html invocation through the
 --output option.

The --output option is available in texi2html since at least version 1.78.
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d18a2f97cf..f626332482 100644
--- a/Makefile
+++ b/Makefile
@@ -117,7 +117,7 @@ TEXIDEP = awk '/^@include/ { printf "$@: $(@D)/%s\n", $$2 }' <$< >$(@:%=%.d)
 doc/%.html: TAG = HTML
 doc/%.html: doc/%.texi $(SRC_PATH_BARE)/doc/t2h.init
 	$(Q)$(TEXIDEP)
-	$(M)cd doc && texi2html -monolithic --init-file $(SRC_PATH_BARE)/doc/t2h.init $(<:doc/%=%)
+	$(M)texi2html -monolithic --init-file $(SRC_PATH_BARE)/doc/t2h.init --output $@ $<
 
 doc/%.pod: TAG = POD
 doc/%.pod: doc/%.texi

From 43b6c3eb182617ff08e72584e1c4635ac20d33e8 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 27 May 2011 20:34:01 +0200
Subject: [PATCH 551/830] mpegts: Wrap #ifdef DEBUG and av_hex_dump_log()
 combination in a macro.

---
 libavformat/internal.h |  6 ++++++
 libavformat/mpegts.c   | 13 ++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavformat/internal.h b/libavformat/internal.h
index 6f1305ce25..ad3d9c91c2 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -26,6 +26,12 @@
 
 #define MAX_URL_SIZE 4096
 
+#ifdef DEBUG
+#    define hex_dump_debug(class, buf, size) av_hex_dump_log(class, AV_LOG_DEBUG, buf, size)
+#else
+#    define hex_dump_debug(class, buf, size)
+#endif
+
 typedef struct AVCodecTag {
     enum CodecID id;
     unsigned int tag;
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 33675f470f..f675bf03fb 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1014,10 +1014,8 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     int mp4_dec_config_descr_len = 0;
     int mp4_es_id = 0;
 
-#ifdef DEBUG
     av_dlog(ts->stream, "PMT: len %i\n", section_len);
-    av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len);
-#endif
+    hex_dump_debug(ts->stream, (uint8_t *)section, section_len);
 
     p_end = section + section_len - 4;
     p = section;
@@ -1134,10 +1132,9 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     const uint8_t *p, *p_end;
     int sid, pmt_pid;
 
-#ifdef DEBUG
     av_dlog(ts->stream, "PAT:\n");
-    av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len);
-#endif
+    hex_dump_debug(ts->stream, (uint8_t *)section, section_len);
+
     p_end = section + section_len - 4;
     p = section;
     if (parse_section_header(h, &p, p_end) < 0)
@@ -1178,10 +1175,8 @@ static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
     int onid, val, sid, desc_list_len, desc_tag, desc_len, service_type;
     char *name, *provider_name;
 
-#ifdef DEBUG
     av_dlog(ts->stream, "SDT:\n");
-    av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len);
-#endif
+    hex_dump_debug(ts->stream, (uint8_t *)section, section_len);
 
     p_end = section + section_len - 4;
     p = section;

From e472f0ea5a025b61b7c208661a4bc31091a3c2c2 Mon Sep 17 00:00:00 2001
From: Philip de Nier <philipn@rd.bbc.co.uk>
Date: Thu, 2 Jun 2011 17:04:42 +0100
Subject: [PATCH 552/830] wav: fix skip size at end of ds64 chunk

---
 libavformat/wav.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index b475d0cb49..34b957121e 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -334,7 +334,7 @@ static int wav_read_header(AVFormatContext *s,
                    data_size, sample_count);
             return AVERROR_INVALIDDATA;
         }
-        avio_skip(pb, size - 16); /* skip rest of ds64 chunk */
+        avio_skip(pb, size - 24); /* skip rest of ds64 chunk */
     }
 
     for (;;) {

From c9614bb22c98c513c010e1e14b12349a8cc74d8c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Jun 2011 15:37:44 +0200
Subject: [PATCH 553/830] wav: update size check for ds64

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/wav.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 34b957121e..5c4b7d3a6b 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -323,7 +323,7 @@ static int wav_read_header(AVFormatContext *s,
         if (avio_rl32(pb) != MKTAG('d', 's', '6', '4'))
             return -1;
         size = avio_rl32(pb);
-        if (size < 16)
+        if (size < 24)
             return -1;
         avio_rl64(pb); /* RIFF size */
         data_size = avio_rl64(pb);

From 25c32d082b1ab4c3567c09f707e75c8ac582806c Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Fri, 3 Jun 2011 13:30:27 +0200
Subject: [PATCH 554/830] cmdutils: move "#undef main" from ffplay.c to
 cmdutils.h

On Windows/MinGW the SDL cflags re-define the main() function, which
results in a linking error if the define is not undeffed.

Since the addition of the SDL output device, SDL cflags are used also
for compiling ffmpeg and ffprobe, so we need to move this trick from
ffplay.c to a common header.

Fix trac issue #256.
---
 cmdutils.h | 4 ++++
 ffplay.c   | 6 +-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmdutils.h b/cmdutils.h
index eea44018b6..eca98a3cf4 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -29,6 +29,10 @@
 #include "libavformat/avformat.h"
 #include "libswscale/swscale.h"
 
+#ifdef __MINGW32__
+#undef main /* We don't want SDL to override our main() */
+#endif
+
 /**
  * program name, defined by the program for show_version().
  */
diff --git a/ffplay.c b/ffplay.c
index 74d1f10945..5997f651e1 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -43,14 +43,10 @@
 # include "libavfilter/avfiltergraph.h"
 #endif
 
-#include "cmdutils.h"
-
 #include <SDL.h>
 #include <SDL_thread.h>
 
-#ifdef __MINGW32__
-#undef main /* We don't want SDL to override our main() */
-#endif
+#include "cmdutils.h"
 
 #include <unistd.h>
 #include <assert.h>

From 83a8b3002f3399c52b3900ca2f068d51e97713ec Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 14:24:58 -0700
Subject: [PATCH 555/830] mpegvideoenc: fix multislice fate tests with
 threading disabled.

The MPEG encoding code assumes that n_threads == n_slices, and thus it
should use n_slices even if threading itself is disabled.
---
 libavcodec/mpegvideo.c     | 6 +++---
 libavcodec/mpegvideo_enc.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index dfb2289201..6a45da8761 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -586,7 +586,7 @@ av_cold int MPV_common_init(MpegEncContext *s)
         return -1;
     }
 
-    if((s->avctx->active_thread_type & FF_THREAD_SLICE) &&
+    if((s->encoding || (s->avctx->active_thread_type & FF_THREAD_SLICE)) &&
        (s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){
         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
         return -1;
@@ -750,7 +750,7 @@ av_cold int MPV_common_init(MpegEncContext *s)
     s->thread_context[0]= s;
 
     if (s->width && s->height) {
-    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
+    if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) {
         threads = s->avctx->thread_count;
 
         for(i=1; i<threads; i++){
@@ -782,7 +782,7 @@ void MPV_common_end(MpegEncContext *s)
 {
     int i, j, k;
 
-    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
+    if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) {
         for(i=0; i<s->avctx->thread_count; i++){
             free_duplicate_context(s->thread_context[i]);
         }
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 237ea64790..b5a1872eb1 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1226,7 +1226,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
 {
     MpegEncContext *s = avctx->priv_data;
     AVFrame *pic_arg = data;
-    int i, stuffing_count, context_count = avctx->active_thread_type&FF_THREAD_SLICE ? avctx->thread_count : 1;
+    int i, stuffing_count, context_count = avctx->thread_count;
 
     for(i=0; i<context_count; i++){
         int start_y= s->thread_context[i]->start_mb_y;
@@ -2759,7 +2759,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 {
     int i;
     int bits;
-    int context_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
+    int context_count = s->avctx->thread_count;
 
     s->picture_number = picture_number;
 

From 0e5d31b16b39f0b4d2e3307714af592dd65cf123 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 20:17:20 -0700
Subject: [PATCH 556/830] swscale: split out unscaled altivec YUV converters in
 their own file.

---
 libswscale/Makefile                       |   3 +-
 libswscale/ppc/swscale_altivec_template.c | 155 ------------------
 libswscale/ppc/yuv2yuv_altivec.c          | 191 ++++++++++++++++++++++
 libswscale/swscale.c                      |  20 +--
 libswscale/swscale_internal.h             |   2 +
 5 files changed, 200 insertions(+), 171 deletions(-)
 create mode 100644 libswscale/ppc/yuv2yuv_altivec.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 7f8f721e45..8ecce99195 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -11,7 +11,8 @@ OBJS-$(ARCH_BFIN)          +=  bfin/internal_bfin.o     \
                                bfin/swscale_bfin.o      \
                                bfin/yuv2rgb_bfin.o
 OBJS-$(CONFIG_MLIB)        +=  mlib/yuv2rgb_mlib.o
-OBJS-$(HAVE_ALTIVEC)       +=  ppc/yuv2rgb_altivec.o
+OBJS-$(HAVE_ALTIVEC)       +=  ppc/yuv2rgb_altivec.o    \
+                               ppc/yuv2yuv_altivec.o
 OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
                                x86/yuv2rgb_mmx.o
 OBJS-$(HAVE_VIS)           +=  sparc/yuv2rgb_vis.o
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index 21e3b4eafb..5a821d0abf 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -390,158 +390,3 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
     }
     }
 }
-
-static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
-{
-    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-    const uint8_t *ysrc = src[0];
-    const uint8_t *usrc = src[1];
-    const uint8_t *vsrc = src[2];
-    const int width = c->srcW;
-    const int height = srcSliceH;
-    const int lumStride = srcStride[0];
-    const int chromStride = srcStride[1];
-    const int dstStride = dstStride_a[0];
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    const int vertLumPerChroma = 2;
-    register unsigned int y;
-
-    if (width&15) {
-        yv12toyuy2(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
-        return srcSliceH;
-    }
-
-    /* This code assumes:
-
-    1) dst is 16 bytes-aligned
-    2) dstStride is a multiple of 16
-    3) width is a multiple of 16
-    4) lum & chrom stride are multiples of 8
-    */
-
-    for (y=0; y<height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i+= 32) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_yA = vec_ld(i, ysrc);
-            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA = vec_ld(j, usrc);
-            vector unsigned char v_uB = vec_ld(j + 16, usrc);
-            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA = vec_ld(j, vsrc);
-            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
-            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-            vec_st(v_yuy2_2, (i << 1) + 32, dst);
-            vec_st(v_yuy2_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_y1 = vec_ld(i, ysrc);
-            vector unsigned char v_u = vec_ld(j, usrc);
-            vector unsigned char v_v = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-        }
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-
-    return srcSliceH;
-}
-
-static inline int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
-{
-    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-    const uint8_t *ysrc = src[0];
-    const uint8_t *usrc = src[1];
-    const uint8_t *vsrc = src[2];
-    const int width = c->srcW;
-    const int height = srcSliceH;
-    const int lumStride = srcStride[0];
-    const int chromStride = srcStride[1];
-    const int dstStride = dstStride_a[0];
-    const int vertLumPerChroma = 2;
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    register unsigned int y;
-
-    if (width&15) {
-        yv12touyvy(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
-        return srcSliceH;
-    }
-
-    /* This code assumes:
-
-    1) dst is 16 bytes-aligned
-    2) dstStride is a multiple of 16
-    3) width is a multiple of 16
-    4) lum & chrom stride are multiples of 8
-    */
-
-    for (y=0; y<height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i+= 32) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_yA = vec_ld(i, ysrc);
-            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA = vec_ld(j, usrc);
-            vector unsigned char v_uB = vec_ld(j + 16, usrc);
-            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA = vec_ld(j, vsrc);
-            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
-            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-            vec_st(v_uyvy_2, (i << 1) + 32, dst);
-            vec_st(v_uyvy_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_y1 = vec_ld(i, ysrc);
-            vector unsigned char v_u = vec_ld(j, usrc);
-            vector unsigned char v_v = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-        }
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-    return srcSliceH;
-}
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
new file mode 100644
index 0000000000..4cd02ffe1d
--- /dev/null
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -0,0 +1,191 @@
+/*
+ * AltiVec-enhanced yuv-to-yuv convertion routines.
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/cpu.h"
+
+static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[],
+                                       int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dstParam[],
+                                       int dstStride_a[])
+{
+    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+    const uint8_t *ysrc = src[0];
+    const uint8_t *usrc = src[1];
+    const uint8_t *vsrc = src[2];
+    const int width = c->srcW;
+    const int height = srcSliceH;
+    const int lumStride = srcStride[0];
+    const int chromStride = srcStride[1];
+    const int dstStride = dstStride_a[0];
+    const vector unsigned char yperm = vec_lvsl(0, ysrc);
+    const int vertLumPerChroma = 2;
+    register unsigned int y;
+
+    /* This code assumes:
+
+    1) dst is 16 bytes-aligned
+    2) dstStride is a multiple of 16
+    3) width is a multiple of 16
+    4) lum & chrom stride are multiples of 8
+    */
+
+    for (y=0; y<height; y++) {
+        int i;
+        for (i = 0; i < width - 31; i+= 32) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_yA = vec_ld(i, ysrc);
+            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+            vector unsigned char v_uA = vec_ld(j, usrc);
+            vector unsigned char v_uB = vec_ld(j + 16, usrc);
+            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+            vector unsigned char v_vA = vec_ld(j, vsrc);
+            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
+            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
+            vec_st(v_yuy2_0, (i << 1), dst);
+            vec_st(v_yuy2_1, (i << 1) + 16, dst);
+            vec_st(v_yuy2_2, (i << 1) + 32, dst);
+            vec_st(v_yuy2_3, (i << 1) + 48, dst);
+        }
+        if (i < width) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_y1 = vec_ld(i, ysrc);
+            vector unsigned char v_u = vec_ld(j, usrc);
+            vector unsigned char v_v = vec_ld(j, vsrc);
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+            vec_st(v_yuy2_0, (i << 1), dst);
+            vec_st(v_yuy2_1, (i << 1) + 16, dst);
+        }
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+
+    return srcSliceH;
+}
+
+static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[],
+                                       int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dstParam[],
+                                       int dstStride_a[])
+{
+    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+    const uint8_t *ysrc = src[0];
+    const uint8_t *usrc = src[1];
+    const uint8_t *vsrc = src[2];
+    const int width = c->srcW;
+    const int height = srcSliceH;
+    const int lumStride = srcStride[0];
+    const int chromStride = srcStride[1];
+    const int dstStride = dstStride_a[0];
+    const int vertLumPerChroma = 2;
+    const vector unsigned char yperm = vec_lvsl(0, ysrc);
+    register unsigned int y;
+
+    /* This code assumes:
+
+    1) dst is 16 bytes-aligned
+    2) dstStride is a multiple of 16
+    3) width is a multiple of 16
+    4) lum & chrom stride are multiples of 8
+    */
+
+    for (y=0; y<height; y++) {
+        int i;
+        for (i = 0; i < width - 31; i+= 32) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_yA = vec_ld(i, ysrc);
+            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+            vector unsigned char v_uA = vec_ld(j, usrc);
+            vector unsigned char v_uB = vec_ld(j + 16, usrc);
+            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+            vector unsigned char v_vA = vec_ld(j, vsrc);
+            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
+            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
+            vec_st(v_uyvy_0, (i << 1), dst);
+            vec_st(v_uyvy_1, (i << 1) + 16, dst);
+            vec_st(v_uyvy_2, (i << 1) + 32, dst);
+            vec_st(v_uyvy_3, (i << 1) + 48, dst);
+        }
+        if (i < width) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_y1 = vec_ld(i, ysrc);
+            vector unsigned char v_u = vec_ld(j, usrc);
+            vector unsigned char v_v = vec_ld(j, vsrc);
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+            vec_st(v_uyvy_0, (i << 1), dst);
+            vec_st(v_uyvy_1, (i << 1) + 16, dst);
+        }
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+    return srcSliceH;
+}
+
+void ff_swscale_get_unscaled_altivec(SwsContext *c)
+{
+    if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) &&
+        !(c->flags & SWS_BITEXACT) && c->srcFormat == PIX_FMT_YUV420P) {
+        enum PixelFormat dstFormat = c->dstFormat;
+
+        // unscaled YV12 -> packed YUV, we want speed
+        if (dstFormat == PIX_FMT_YUYV422)
+            c->swScale= yv12toyuy2_unscaled_altivec;
+        else if (dstFormat == PIX_FMT_UYVY422)
+            c->swScale= yv12touyvy_unscaled_altivec;
+    }
+}
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index d6bdfdd3cc..5327948d3c 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1857,18 +1857,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
     if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
         c->swScale= uyvyToYuv422Wrapper;
 
-#if HAVE_ALTIVEC
-    if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) &&
-        !(c->flags & SWS_BITEXACT) &&
-        srcFormat == PIX_FMT_YUV420P) {
-        // unscaled YV12 -> packed YUV, we want speed
-        if (dstFormat == PIX_FMT_YUYV422)
-            c->swScale= yv12toyuy2_unscaled_altivec;
-        else if (dstFormat == PIX_FMT_UYVY422)
-            c->swScale= yv12touyvy_unscaled_altivec;
-    }
-#endif
-
     /* simple copy */
     if (  srcFormat == dstFormat
         || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
@@ -1887,9 +1875,11 @@ void ff_get_unscaled_swscale(SwsContext *c)
         else /* Planar YUV or gray */
             c->swScale= planarCopyWrapper;
     }
-#if ARCH_BFIN
-    ff_bfin_get_unscaled_swscale (c);
-#endif
+
+    if (ARCH_BFIN)
+        ff_bfin_get_unscaled_swscale(c);
+    if (HAVE_ALTIVEC)
+        ff_swscale_get_unscaled_altivec(c);
 }
 
 static void reset_ptr(const uint8_t* src[], int format)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index c17d55023d..09d122418f 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -479,6 +479,8 @@ extern const AVClass sws_context_class;
  */
 void ff_get_unscaled_swscale(SwsContext *c);
 
+void ff_swscale_get_unscaled_altivec(SwsContext *c);
+
 /**
  * Returns function pointer to fastest main scaler path function depending
  * on architecture and available optimizations.

From a3e9bb5deed0f43c84ee3006962f7bd27ae4f811 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 19:28:56 -0700
Subject: [PATCH 557/830] swscale: remove indirections in
 ppc/swscale_template.c.

---
 libswscale/ppc/swscale_altivec_template.c | 25 +++++++--
 libswscale/ppc/swscale_template.c         | 66 -----------------------
 libswscale/ppc/yuv2rgb_altivec.c          |  3 +-
 libswscale/swscale.c                      |  2 +-
 4 files changed, 24 insertions(+), 72 deletions(-)
 delete mode 100644 libswscale/ppc/swscale_template.c

diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index 5a821d0abf..3c31c3e130 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -85,12 +85,15 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
     }
 }
 
-static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+static void
+yuv2yuvX_altivec_real(SwsContext *c,
+                      const int16_t *lumFilter, const int16_t **lumSrc,
                       int lumFilterSize, const int16_t *chrFilter,
                       const int16_t **chrUSrc, const int16_t **chrVSrc,
-                      int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                      uint8_t *vDest, int dstW, int chrDstW)
+                      int chrFilterSize, const int16_t **alpSrc,
+                      uint8_t *dest, uint8_t *uDest,
+                      uint8_t *vDest, uint8_t *aDest,
+                      int dstW, int chrDstW)
 {
     const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
     register int i, j;
@@ -390,3 +393,17 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
     }
     }
 }
+
+static void RENAME(sws_init_swScale)(SwsContext *c)
+{
+    c->yuv2yuvX     = yuv2yuvX_altivec_real;
+
+    /* The following list of supported dstFormat values should
+     * match what's found in the body of ff_yuv2packedX_altivec() */
+    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
+        (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
+         c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
+         c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
+            c->yuv2packedX  = ff_yuv2packedX_altivec;
+        }
+}
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
deleted file mode 100644
index 01afe43bb2..0000000000
--- a/libswscale/ppc/swscale_template.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#include "swscale_altivec_template.c"
-
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc, int chrFilterSize,
-                                    const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
-{
-    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                          chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                          dest, uDest, vDest, dstW, chrDstW);
-}
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc, int chrFilterSize,
-                                       const int16_t **alpSrc, uint8_t *dest,
-                                       int dstW, int dstY)
-{
-    /* The following list of supported dstFormat values should
-       match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
-         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
-            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
-                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                                   dest, dstW, dstY);
-    else
-        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                       alpSrc, dest, dstW, dstY);
-}
-
-static void RENAME(sws_init_swScale)(SwsContext *c)
-{
-    c->yuv2yuvX     = RENAME(yuv2yuvX    );
-    c->yuv2packedX  = RENAME(yuv2packedX );
-}
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 54ebee1edf..4b2bdafca0 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -630,7 +630,8 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc, int chrFilterSize,
-                       uint8_t *dest, int dstW, int dstY)
+                       const int16_t **alpSrc, uint8_t *dest,
+                       int dstW, int dstY)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5327948d3c..f15495de36 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1192,7 +1192,7 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
 #if HAVE_ALTIVEC
 #undef RENAME
 #define RENAME(a) a ## _altivec
-#include "ppc/swscale_template.c"
+#include "ppc/swscale_altivec_template.c"
 #endif
 
 //MMX versions

From 67d80a54217f93a50b7a52449fad12215b43c9e8 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 20:04:04 -0700
Subject: [PATCH 558/830] swscale: split out ppc _template.c files from main
 swscale.c.

---
 libswscale/Makefile                           |  3 +-
 ...e_altivec_template.c => swscale_altivec.c} | 20 ++++++++---
 libswscale/ppc/yuv2rgb_altivec.c              |  1 +
 libswscale/ppc/yuv2rgb_altivec.h              | 34 +++++++++++++++++++
 libswscale/swscale.c                          | 12 ++-----
 libswscale/swscale_internal.h                 |  7 ++--
 6 files changed, 56 insertions(+), 21 deletions(-)
 rename libswscale/ppc/{swscale_altivec_template.c => swscale_altivec.c} (96%)
 create mode 100644 libswscale/ppc/yuv2rgb_altivec.h

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 8ecce99195..7a0d129933 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -11,7 +11,8 @@ OBJS-$(ARCH_BFIN)          +=  bfin/internal_bfin.o     \
                                bfin/swscale_bfin.o      \
                                bfin/yuv2rgb_bfin.o
 OBJS-$(CONFIG_MLIB)        +=  mlib/yuv2rgb_mlib.o
-OBJS-$(HAVE_ALTIVEC)       +=  ppc/yuv2rgb_altivec.o    \
+OBJS-$(HAVE_ALTIVEC)       +=  ppc/swscale_altivec.o    \
+                               ppc/yuv2rgb_altivec.o    \
                                ppc/yuv2yuv_altivec.o
 OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
                                x86/yuv2rgb_mmx.o
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec.c
similarity index 96%
rename from libswscale/ppc/swscale_altivec_template.c
rename to libswscale/ppc/swscale_altivec.c
index 3c31c3e130..acfdc94cd8 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -21,6 +21,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <inttypes.h>
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/cpu.h"
+#include "yuv2rgb_altivec.h"
+
 #define vzero vec_splat_s32(0)
 
 static inline void
@@ -214,10 +221,10 @@ yuv2yuvX_altivec_real(SwsContext *c,
     }
 }
 
-static inline void hScale_altivec_real(int16_t *dst, int dstW,
-                                       const uint8_t *src, int srcW,
-                                       int xInc, const int16_t *filter,
-                                       const int16_t *filterPos, int filterSize)
+static void hScale_altivec_real(int16_t *dst, int dstW,
+                                const uint8_t *src, int srcW,
+                                int xInc, const int16_t *filter,
+                                const int16_t *filterPos, int filterSize)
 {
     register int i;
     DECLARE_ALIGNED(16, int, tempo)[4];
@@ -394,8 +401,11 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
     }
 }
 
-static void RENAME(sws_init_swScale)(SwsContext *c)
+void ff_sws_init_swScale_altivec(SwsContext *c)
 {
+    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
+        return;
+
     c->yuv2yuvX     = yuv2yuvX_altivec_real;
 
     /* The following list of supported dstFormat values should
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 4b2bdafca0..476db22489 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -95,6 +95,7 @@ adjustment.
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/cpu.h"
+#include "yuv2rgb_altivec.h"
 
 #undef PROFILE_THE_BEAST
 #undef INC_SCALING
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
new file mode 100644
index 0000000000..b54a856905
--- /dev/null
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -0,0 +1,34 @@
+/*
+ * AltiVec-enhanced yuv2yuvX
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef PPC_YUV2RGB_ALTIVEC_H
+#define PPC_YUV2RGB_ALTIVEC_H 1
+
+void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
+                            const int16_t **alpSrc, uint8_t *dest,
+                            int dstW, int dstY);
+
+#endif /* PPC_YUV2RGB_ALTIVEC_H */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index f15495de36..1fc3155ae5 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1189,12 +1189,6 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
 
 #include "swscale_template.c"
 
-#if HAVE_ALTIVEC
-#undef RENAME
-#define RENAME(a) a ## _altivec
-#include "ppc/swscale_altivec_template.c"
-#endif
-
 //MMX versions
 #if HAVE_MMX
 #undef RENAME
@@ -1227,10 +1221,8 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
     if (cpu_flags & AV_CPU_FLAG_MMX2)
         sws_init_swScale_MMX2(c);
 #endif
-#if HAVE_ALTIVEC
-    if (cpu_flags & AV_CPU_FLAG_ALTIVEC)
-        sws_init_swScale_altivec(c);
-#endif
+    if (HAVE_ALTIVEC)
+        ff_sws_init_swScale_altivec(c);
 
     return swScale_c;
 }
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 09d122418f..15643ae337 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -335,11 +335,6 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
-void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            uint8_t *dest, int dstW, int dstY);
 
 const char *sws_format_name(enum PixelFormat format);
 
@@ -487,4 +482,6 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c);
  */
 SwsFunc ff_getSwsFunc(SwsContext *c);
 
+void ff_sws_init_swScale_altivec(SwsContext *c);
+
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */

From 075d0ae72c993403bdeb8713f740d1bbb7a1359d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 20:33:05 -0700
Subject: [PATCH 559/830] swscale: enable hScale_altivec_real.

---
 libswscale/ppc/swscale_altivec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index acfdc94cd8..7f4dfcd6f1 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -406,6 +406,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
         return;
 
+    c->hScale       = hScale_altivec_real;
     c->yuv2yuvX     = yuv2yuvX_altivec_real;
 
     /* The following list of supported dstFormat values should

From 983260b0a473f85e3b67a6c64499e409aa5eb67b Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 22:00:00 -0700
Subject: [PATCH 560/830] swscale: split out x86/swscale_template.c from
 swscale.c.

---
 libswscale/Makefile               |   1 +
 libswscale/swscale.c              |  39 +------
 libswscale/swscale_internal.h     |   4 +
 libswscale/swscale_template.c     |   5 -
 libswscale/x86/swscale_mmx.c      | 187 ++++++++++++++++++++++++++++++
 libswscale/x86/swscale_template.c |  82 -------------
 libswscale/x86/swscale_template.h |  79 -------------
 7 files changed, 194 insertions(+), 203 deletions(-)
 create mode 100644 libswscale/x86/swscale_mmx.c
 delete mode 100644 libswscale/x86/swscale_template.h

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 7a0d129933..1d62b13af4 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -15,6 +15,7 @@ OBJS-$(HAVE_ALTIVEC)       +=  ppc/swscale_altivec.o    \
                                ppc/yuv2rgb_altivec.o    \
                                ppc/yuv2yuv_altivec.o
 OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
+                               x86/swscale_mmx.o        \
                                x86/yuv2rgb_mmx.o
 OBJS-$(HAVE_VIS)           +=  sparc/yuv2rgb_vis.o
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 1fc3155ae5..c16751f420 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -61,16 +61,12 @@ untested special converters
 #include "swscale_internal.h"
 #include "rgb2rgb.h"
 #include "libavutil/intreadwrite.h"
-#include "libavutil/x86_cpu.h"
 #include "libavutil/cpu.h"
 #include "libavutil/avutil.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/bswap.h"
 #include "libavutil/pixdesc.h"
 
-#undef MOVNTQ
-#undef PAVGB
-
 #define DITHER1XBPP
 
 #define isPacked(x)         (       \
@@ -1182,45 +1178,14 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
     }
 }
 
-//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
-//Plain C versions
-
-#define COMPILE_TEMPLATE_MMX2 0
-
 #include "swscale_template.c"
 
-//MMX versions
-#if HAVE_MMX
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX2
-#define COMPILE_TEMPLATE_MMX2 0
-#define RENAME(a) a ## _MMX
-#include "x86/swscale_template.c"
-#endif
-
-//MMX2 versions
-#if HAVE_MMX2
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMX2
-#define COMPILE_TEMPLATE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "x86/swscale_template.c"
-#endif
-
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
-    int cpu_flags = av_get_cpu_flags();
-
     sws_init_swScale_c(c);
 
-#if HAVE_MMX
-    if (cpu_flags & AV_CPU_FLAG_MMX)
-        sws_init_swScale_MMX(c);
-#endif
-#if HAVE_MMX2
-    if (cpu_flags & AV_CPU_FLAG_MMX2)
-        sws_init_swScale_MMX2(c);
-#endif
+    if (HAVE_MMX)
+        ff_sws_init_swScale_mmx(c);
     if (HAVE_ALTIVEC)
         ff_sws_init_swScale_altivec(c);
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 15643ae337..678d6d5797 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -329,6 +329,9 @@ int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
 
 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4],
                                     int brightness, int contrast, int saturation);
+void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
+                           int lastInLumBuf, int lastInChrBuf);
+
 SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
@@ -483,5 +486,6 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c);
 SwsFunc ff_getSwsFunc(SwsContext *c);
 
 void ff_sws_init_swScale_altivec(SwsContext *c);
+void ff_sws_init_swScale_mmx(SwsContext *c);
 
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index d05b9a1929..383f01888f 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -467,11 +467,6 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
-#if HAVE_MMX
-static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
-                                  int lastInLumBuf, int lastInChrBuf);
-#endif
-
 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                      int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 {
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
new file mode 100644
index 0000000000..c86f75df51
--- /dev/null
+++ b/libswscale/x86/swscale_mmx.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
+#include "libavutil/pixdesc.h"
+
+DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
+DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
+DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
+DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
+DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
+DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+    0x0103010301030103LL,
+    0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+    0x0602060206020602LL,
+    0x0004000400040004LL,};
+
+DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
+DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
+
+DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
+
+#ifdef FAST_BGR2YV12
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
+#else
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
+#endif /* FAST_BGR2YV12 */
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
+    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
+    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
+};
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+
+//MMX versions
+#if HAVE_MMX
+#undef RENAME
+#define COMPILE_TEMPLATE_MMX2 0
+#define RENAME(a) a ## _MMX
+#include "swscale_template.c"
+#endif
+
+//MMX2 versions
+#if HAVE_MMX2
+#undef RENAME
+#undef COMPILE_TEMPLATE_MMX2
+#define COMPILE_TEMPLATE_MMX2 1
+#define RENAME(a) a ## _MMX2
+#include "swscale_template.c"
+#endif
+
+void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
+                           int lastInLumBuf, int lastInChrBuf)
+{
+    const int dstH= c->dstH;
+    const int flags= c->flags;
+    int16_t **lumPixBuf= c->lumPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **alpPixBuf= c->alpPixBuf;
+    const int vLumBufSize= c->vLumBufSize;
+    const int vChrBufSize= c->vChrBufSize;
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int chrDstY= dstY>>c->chrDstVSubSample;
+    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
+
+    c->blueDither= ff_dither8[dstY&1];
+    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+        c->greenDither= ff_dither8[dstY&1];
+    else
+        c->greenDither= ff_dither4[dstY&1];
+    c->redDither= ff_dither8[(dstY+1)&1];
+    if (dstY < dstH - 2) {
+        const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+        const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+        int i;
+        if (flags & SWS_ACCURATE_RND) {
+            int s= APCK_SIZE / 8;
+            for (i=0; i<vLumFilterSize; i+=2) {
+                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                lumMmxFilter[s*i+APCK_COEF/4  ]=
+                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
+                + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
+                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
+                    alpMmxFilter[s*i+APCK_COEF/4  ]=
+                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
+                }
+            }
+            for (i=0; i<vChrFilterSize; i+=2) {
+                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
+                chrMmxFilter[s*i+APCK_COEF/4  ]=
+                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
+            }
+        } else {
+            for (i=0; i<vLumFilterSize; i++) {
+                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
+                lumMmxFilter[4*i+2]=
+                lumMmxFilter[4*i+3]=
+                ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
+                    alpMmxFilter[4*i+2]=
+                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
+                }
+            }
+            for (i=0; i<vChrFilterSize; i++) {
+                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
+                chrMmxFilter[4*i+2]=
+                chrMmxFilter[4*i+3]=
+                ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+            }
+        }
+    }
+}
+
+void ff_sws_init_swScale_mmx(SwsContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (cpu_flags & AV_CPU_FLAG_MMX)
+        sws_init_swScale_MMX(c);
+    if (cpu_flags & AV_CPU_FLAG_MMX2)
+        sws_init_swScale_MMX2(c);
+}
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4ac59d5fb3..efaadaa09e 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -18,8 +18,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "swscale_template.h"
-
 #undef REAL_MOVNTQ
 #undef MOVNTQ
 #undef PREFETCH
@@ -2185,86 +2183,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
-#if !COMPILE_TEMPLATE_MMX2
-static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
-                                  int lastInLumBuf, int lastInChrBuf)
-{
-    const int dstH= c->dstH;
-    const int flags= c->flags;
-    int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrUPixBuf= c->chrUPixBuf;
-    int16_t **alpPixBuf= c->alpPixBuf;
-    const int vLumBufSize= c->vLumBufSize;
-    const int vChrBufSize= c->vChrBufSize;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int chrDstY= dstY>>c->chrDstVSubSample;
-    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-
-    c->blueDither= ff_dither8[dstY&1];
-    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-        c->greenDither= ff_dither8[dstY&1];
-    else
-        c->greenDither= ff_dither4[dstY&1];
-    c->redDither= ff_dither8[(dstY+1)&1];
-    if (dstY < dstH - 2) {
-        const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-        const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-        int i;
-        if (flags & SWS_ACCURATE_RND) {
-            int s= APCK_SIZE / 8;
-            for (i=0; i<vLumFilterSize; i+=2) {
-                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                lumMmxFilter[s*i+APCK_COEF/4  ]=
-                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                           + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
-                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
-                    alpMmxFilter[s*i+APCK_COEF/4  ]=
-                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
-                }
-            }
-            for (i=0; i<vChrFilterSize; i+=2) {
-                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
-                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
-                chrMmxFilter[s*i+APCK_COEF/4  ]=
-                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                           + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-            }
-        } else {
-            for (i=0; i<vLumFilterSize; i++) {
-                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
-                lumMmxFilter[4*i+2]=
-                lumMmxFilter[4*i+3]=
-                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
-                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
-                    alpMmxFilter[4*i+2]=
-                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
-                }
-            }
-            for (i=0; i<vChrFilterSize; i++) {
-                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
-                chrMmxFilter[4*i+2]=
-                chrMmxFilter[4*i+3]=
-                    ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
-            }
-        }
-    }
-}
-#endif /* !COMPILE_TEMPLATE_MMX2 */
-
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
diff --git a/libswscale/x86/swscale_template.h b/libswscale/x86/swscale_template.h
deleted file mode 100644
index 320e563bdb..0000000000
--- a/libswscale/x86/swscale_template.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#ifndef SWSCALE_X86_SWSCALE_TEMPLATE_H
-#define SWSCALE_X86_SWSCALE_TEMPLATE_H
-
-DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
-DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
-DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
-DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
-DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
-DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-        0x0103010301030103LL,
-        0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-        0x0602060206020602LL,
-        0x0004000400040004LL,};
-
-DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
-DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
-
-#ifdef FAST_BGR2YV12
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
-#else
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
-#endif /* FAST_BGR2YV12 */
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
-    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
-    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
-};
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
-
-#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */

From 1674bd2abe877b857f1be12b152e4ec496307963 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 22:34:12 -0700
Subject: [PATCH 561/830] swscale: integrate (literally) swscale_template.c in
 swscale.c.

---
 libswscale/swscale.c          | 931 ++++++++++++++++++++++++++++++++-
 libswscale/swscale_template.c | 950 ----------------------------------
 2 files changed, 930 insertions(+), 951 deletions(-)
 delete mode 100644 libswscale/swscale_template.c

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index c16751f420..43d0d69055 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1178,7 +1178,936 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
     }
 }
 
-#include "swscale_template.c"
+static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
+                              const int16_t **lumSrc, int lumFilterSize,
+                              const int16_t *chrFilter, const int16_t **chrUSrc,
+                              const int16_t **chrVSrc,
+                              int chrFilterSize, const int16_t **alpSrc,
+                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                              uint8_t *aDest, int dstW, int chrDstW)
+{
+    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
+                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
+                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
+}
+
+static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
+                               const int16_t **lumSrc, int lumFilterSize,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc,
+                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                               int dstW, int chrDstW, enum PixelFormat dstFormat)
+{
+    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
+                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
+                 dest, uDest, dstW, chrDstW, dstFormat);
+}
+
+static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
+                              const int16_t *chrUSrc, const int16_t *chrVSrc,
+                              const int16_t *alpSrc,
+                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                              uint8_t *aDest, int dstW, int chrDstW)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val= (lumSrc[i]+64)>>7;
+        dest[i]= av_clip_uint8(val);
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++) {
+            int u=(chrUSrc[i]+64)>>7;
+            int v=(chrVSrc[i]+64)>>7;
+            uDest[i]= av_clip_uint8(u);
+            vDest[i]= av_clip_uint8(v);
+        }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest)
+        for (i=0; i<dstW; i++) {
+            int val= (alpSrc[i]+64)>>7;
+            aDest[i]= av_clip_uint8(val);
+        }
+}
+
+
+/**
+ * vertical scale YV12 to RGB
+ */
+static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, int dstW, int dstY)
+{
+        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
+                       alpSrc, dest, dstW, dstY);
+}
+
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest, int dstW,
+                                 int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest, int dstW,
+                                 int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (uvalpha < 2048) {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    } else {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    }
+}
+
+//FIXME yuy2* can read up to 7 samples too much
+
+static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                             uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i];
+}
+
+static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                              const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[4*i + 1];
+        dstV[i]= src1[4*i + 3];
+    }
+    assert(src1 == src2);
+}
+
+static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                            const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
+    // we need to skip each second pixel. Same for BEToUV.
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[2*i + 1];
+        dstV[i]= src2[2*i + 1];
+    }
+}
+
+/* This is almost identical to the previous, end exists only because
+ * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
+static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
+                             uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i+1];
+}
+
+static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                              const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[4*i + 0];
+        dstV[i]= src1[4*i + 2];
+    }
+    assert(src1 == src2);
+}
+
+static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                            const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dstU[i]= src1[2*i];
+        dstV[i]= src2[2*i];
+    }
+}
+
+static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
+                              const uint8_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dst1[i] = src[2*i+0];
+        dst2[i] = src[2*i+1];
+    }
+}
+
+static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
+{
+    nvXXtoUV_c(dstU, dstV, src1, width);
+}
+
+static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
+{
+    nvXXtoUV_c(dstV, dstU, src1, width);
+}
+
+// FIXME Maybe dither instead.
+#define YUV_NBPS(depth, endianness, rfunc) \
+static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                          const uint8_t *_srcU, const uint8_t *_srcV, \
+                                          int width, uint32_t *unused) \
+{ \
+    int i; \
+    const uint16_t *srcU = (const uint16_t*)_srcU; \
+    const uint16_t *srcV = (const uint16_t*)_srcV; \
+    for (i = 0; i < width; i++) { \
+        dstU[i] = rfunc(&srcU[i])>>(depth-8); \
+        dstV[i] = rfunc(&srcV[i])>>(depth-8); \
+    } \
+} \
+\
+static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, int width, uint32_t *unused) \
+{ \
+    int i; \
+    const uint16_t *srcY = (const uint16_t*)_srcY; \
+    for (i = 0; i < width; i++) \
+        dstY[i] = rfunc(&srcY[i])>>(depth-8); \
+} \
+
+YUV_NBPS( 9, LE, AV_RL16)
+YUV_NBPS( 9, BE, AV_RB16)
+YUV_NBPS(10, LE, AV_RL16)
+YUV_NBPS(10, BE, AV_RB16)
+
+static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+                              int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src[i*3+0];
+        int g= src[i*3+1];
+        int r= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+}
+
+static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                               const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int r= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+    assert(src1 == src2);
+}
+
+static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                                    const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int b= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int r= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+    assert(src1 == src2);
+}
+
+static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                              uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int r= src[i*3+0];
+        int g= src[i*3+1];
+        int b= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+}
+
+static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                               const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++) {
+        int r= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int b= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+}
+
+static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                                    const uint8_t *src2, int width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++) {
+        int r= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int b= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+}
+
+
+// bilinear / bicubic scaling
+static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
+                            int srcW, int xInc,
+                            const int16_t *filter, const int16_t *filterPos,
+                            int filterSize)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int j;
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        //filter += hFilterSize;
+        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
+        //dst[i] = val>>7;
+    }
+}
+
+//FIXME all pal and rgb srcFormats could do this convertion as well
+//FIXME all scalers more complex than bilinear could do half of this transform
+static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
+        dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
+    }
+}
+static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
+        dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
+    }
+}
+static void lumRangeToJpeg_c(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
+}
+static void lumRangeFromJpeg_c(uint16_t *dst, int width)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        dst[i] = (dst[i]*14071 + 33561947)>>14;
+}
+
+static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
+                                  const uint8_t *src, int srcW, int xInc)
+{
+    int i;
+    unsigned int xpos=0;
+    for (i=0;i<dstWidth;i++) {
+        register unsigned int xx=xpos>>16;
+        register unsigned int xalpha=(xpos&0xFFFF)>>9;
+        dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
+        xpos+=xInc;
+    }
+}
+
+      // *** horizontal scale Y line to temp buffer
+static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
+                             const uint8_t *src, int srcW, int xInc,
+                             const int16_t *hLumFilter,
+                             const int16_t *hLumFilterPos, int hLumFilterSize,
+                             uint8_t *formatConvBuffer,
+                             uint32_t *pal, int isAlpha)
+{
+    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
+
+    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
+
+    if (toYV12) {
+        toYV12(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+
+    if (!c->hyscale_fast) {
+        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
+    }
+
+    if (convertRange)
+        convertRange(dst, dstWidth);
+}
+
+static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
+                                  int dstWidth, const uint8_t *src1,
+                                  const uint8_t *src2, int srcW, int xInc)
+{
+    int i;
+    unsigned int xpos=0;
+    for (i=0;i<dstWidth;i++) {
+        register unsigned int xx=xpos>>16;
+        register unsigned int xalpha=(xpos&0xFFFF)>>9;
+        dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+        dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
+        xpos+=xInc;
+    }
+}
+
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int srcW, int xInc, const int16_t *hChrFilter,
+                             const int16_t *hChrFilterPos, int hChrFilterSize,
+                             uint8_t *formatConvBuffer, uint32_t *pal)
+{
+
+    src1 += c->chrSrcOffset;
+    src2 += c->chrSrcOffset;
+
+    if (c->chrToYV12) {
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
+        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= buf2;
+    }
+
+    if (!c->hcscale_fast) {
+        c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+    } else { // fast bilinear upscale / crap downscale
+        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
+    }
+
+    if (c->chrConvertRange)
+        c->chrConvertRange(dst1, dst2, dstWidth);
+}
+
+#define DEBUG_SWSCALE_BUFFERS 0
+#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
+
+static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
+                     int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    /* load a few things into local vars to make the code more readable? and faster */
+    const int srcW= c->srcW;
+    const int dstW= c->dstW;
+    const int dstH= c->dstH;
+    const int chrDstW= c->chrDstW;
+    const int chrSrcW= c->chrSrcW;
+    const int lumXInc= c->lumXInc;
+    const int chrXInc= c->chrXInc;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    const int flags= c->flags;
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *hLumFilterPos= c->hLumFilterPos;
+    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int16_t *hLumFilter= c->hLumFilter;
+    int16_t *hChrFilter= c->hChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int hLumFilterSize= c->hLumFilterSize;
+    const int hChrFilterSize= c->hChrFilterSize;
+    int16_t **lumPixBuf= c->lumPixBuf;
+    int16_t **chrUPixBuf= c->chrUPixBuf;
+    int16_t **chrVPixBuf= c->chrVPixBuf;
+    int16_t **alpPixBuf= c->alpPixBuf;
+    const int vLumBufSize= c->vLumBufSize;
+    const int vChrBufSize= c->vChrBufSize;
+    uint8_t *formatConvBuffer= c->formatConvBuffer;
+    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
+    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+    int lastDstY;
+    uint32_t *pal=c->pal_yuv;
+
+    /* vars which will change and which we need to store back in the context */
+    int dstY= c->dstY;
+    int lumBufIndex= c->lumBufIndex;
+    int chrBufIndex= c->chrBufIndex;
+    int lastInLumBuf= c->lastInLumBuf;
+    int lastInChrBuf= c->lastInChrBuf;
+
+    if (isPacked(c->srcFormat)) {
+        src[0]=
+        src[1]=
+        src[2]=
+        src[3]= src[0];
+        srcStride[0]=
+        srcStride[1]=
+        srcStride[2]=
+        srcStride[3]= srcStride[0];
+    }
+    srcStride[1]<<= c->vChrDrop;
+    srcStride[2]<<= c->vChrDrop;
+
+    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
+                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
+                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
+    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
+                   srcSliceY,    srcSliceH,    dstY,    dstH);
+    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
+                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
+
+    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
+        static int warnedAlready=0; //FIXME move this into the context perhaps
+        if (flags & SWS_PRINT_INFO && !warnedAlready) {
+            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
+                   "         ->cannot do aligned memory accesses anymore\n");
+            warnedAlready=1;
+        }
+    }
+
+    /* Note the user might start scaling the picture in the middle so this
+       will not get executed. This is not really intended but works
+       currently, so people might do it. */
+    if (srcSliceY ==0) {
+        lumBufIndex=-1;
+        chrBufIndex=-1;
+        dstY=0;
+        lastInLumBuf= -1;
+        lastInChrBuf= -1;
+    }
+
+    lastDstY= dstY;
+
+    for (;dstY < dstH; dstY++) {
+        unsigned char *dest =dst[0]+dstStride[0]*dstY;
+        const int chrDstY= dstY>>c->chrDstVSubSample;
+        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
+        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
+        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+
+        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
+        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
+        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
+        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
+        int enough_lines;
+
+        //handle holes (FAST_BILINEAR & weird filters)
+        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
+        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
+        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
+        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
+
+        DEBUG_BUFFERS("dstY: %d\n", dstY);
+        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
+                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
+        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
+                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
+
+        // Do we have enough lines in this slice to output the dstY line
+        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
+
+        if (!enough_lines) {
+            lastLumSrcY = srcSliceY + srcSliceH - 1;
+            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
+            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
+                                            lastLumSrcY, lastChrSrcY);
+        }
+
+        //Do horizontal scaling
+        while(lastInLumBuf < lastLumSrcY) {
+            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
+            lumBufIndex++;
+            assert(lumBufIndex < 2*vLumBufSize);
+            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+            assert(lastInLumBuf + 1 - srcSliceY >= 0);
+            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+                      hLumFilter, hLumFilterPos, hLumFilterSize,
+                      formatConvBuffer,
+                      pal, 0);
+            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
+                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
+                          lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
+                          formatConvBuffer,
+                          pal, 1);
+            lastInLumBuf++;
+            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
+                               lumBufIndex,    lastInLumBuf);
+        }
+        while(lastInChrBuf < lastChrSrcY) {
+            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+            chrBufIndex++;
+            assert(chrBufIndex < 2*vChrBufSize);
+            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
+            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+            //FIXME replace parameters through context struct (some at least)
+
+            if (c->needs_hcscale)
+                hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
+                          chrDstW, src1, src2, chrSrcW, chrXInc,
+                          hChrFilter, hChrFilterPos, hChrFilterSize,
+                          formatConvBuffer, pal);
+            lastInChrBuf++;
+            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
+                               chrBufIndex,    lastInChrBuf);
+        }
+        //wrap buf index around to stay inside the ring buffer
+        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
+        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
+        if (!enough_lines)
+            break; //we can't output a dstY line so let's try with the next slice
+
+#if HAVE_MMX
+        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
+#endif
+        if (dstY < dstH-2) {
+            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                c->yuv2nv12X(c,
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
+                    yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                  chrVSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
+                                  (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+                    const int16_t *lumBuf = lumSrcPtr[0];
+                    const int16_t *chrUBuf= chrUSrcPtr[0];
+                    const int16_t *chrVBuf= chrVSrcPtr[0];
+                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
+                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
+                                uDest, vDest, aDest, dstW, chrDstW);
+                } else { //General YV12
+                    c->yuv2yuvX(c,
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                chrVSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
+                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
+                                         chrVSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL,
+                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                    }
+                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
+                    int lumAlpha= vLumFilter[2*dstY+1];
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    lumMmxFilter[2]=
+                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
+                    chrMmxFilter[2]=
+                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                                       *chrVSrcPtr, *(chrVSrcPtr+1),
+                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
+                                       dest, dstW, lumAlpha, chrAlpha, dstY);
+                    }
+                } else { //general RGB
+                    if(flags & SWS_FULL_CHR_H_INT) {
+                        yuv2rgbXinC_full(c,
+                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                         alpSrcPtr, dest, dstW, dstY);
+                    } else {
+                        c->yuv2packedX(c,
+                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                       alpSrcPtr, dest, dstW, dstY);
+                    }
+                }
+            }
+        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
+            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                yuv2nv12XinC(
+                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                             dest, uDest, dstW, chrDstW, dstFormat);
+            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
+                    yuv2yuvX16inC(
+                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                  dstFormat);
+                } else {
+                    yuv2yuvXinC(
+                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                }
+            } else {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
+                if(flags & SWS_FULL_CHR_H_INT) {
+                    yuv2rgbXinC_full(c,
+                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                     vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                     alpSrcPtr, dest, dstW, dstY);
+                } else {
+                    yuv2packedXinC(c,
+                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                   alpSrcPtr, dest, dstW, dstY);
+                }
+            }
+        }
+    }
+
+    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
+        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
+
+#if HAVE_MMX2
+    if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
+        __asm__ volatile("sfence":::"memory");
+#endif
+    emms_c();
+
+    /* store changed local vars back in the context */
+    c->dstY= dstY;
+    c->lumBufIndex= lumBufIndex;
+    c->chrBufIndex= chrBufIndex;
+    c->lastInLumBuf= lastInLumBuf;
+    c->lastInChrBuf= lastInChrBuf;
+
+    return dstY - lastDstY;
+}
+
+static void sws_init_swScale_c(SwsContext *c)
+{
+    enum PixelFormat srcFormat = c->srcFormat;
+
+    c->yuv2nv12X    = yuv2nv12X_c;
+    c->yuv2yuv1     = yuv2yuv1_c;
+    c->yuv2yuvX     = yuv2yuvX_c;
+    c->yuv2packed1  = yuv2packed1_c;
+    c->yuv2packed2  = yuv2packed2_c;
+    c->yuv2packedX  = yuv2packedX_c;
+
+    c->hScale       = hScale_c;
+
+    if (c->flags & SWS_FAST_BILINEAR)
+    {
+        c->hyscale_fast = hyscale_fast_c;
+        c->hcscale_fast = hcscale_fast_c;
+    }
+
+    c->chrToYV12 = NULL;
+    switch(srcFormat) {
+        case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
+        case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
+        case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
+        case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
+        case PIX_FMT_RGB8     :
+        case PIX_FMT_BGR8     :
+        case PIX_FMT_PAL8     :
+        case PIX_FMT_BGR4_BYTE:
+        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
+        case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
+        case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
+        case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
+        case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
+        case PIX_FMT_YUV420P16BE:
+        case PIX_FMT_YUV422P16BE:
+        case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
+        case PIX_FMT_YUV420P16LE:
+        case PIX_FMT_YUV422P16LE:
+        case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
+    }
+    if (c->chrSrcHSubSample) {
+        switch(srcFormat) {
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
+        case PIX_FMT_BGR48BE:
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
+        }
+    } else {
+        switch(srcFormat) {
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
+        case PIX_FMT_BGR48BE:
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
+        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
+        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
+        }
+    }
+
+    c->lumToYV12 = NULL;
+    c->alpToYV12 = NULL;
+    switch (srcFormat) {
+    case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
+    case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
+    case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
+    case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
+    case PIX_FMT_YUYV422  :
+    case PIX_FMT_YUV420P16BE:
+    case PIX_FMT_YUV422P16BE:
+    case PIX_FMT_YUV444P16BE:
+    case PIX_FMT_Y400A    :
+    case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
+    case PIX_FMT_UYVY422  :
+    case PIX_FMT_YUV420P16LE:
+    case PIX_FMT_YUV422P16LE:
+    case PIX_FMT_YUV444P16LE:
+    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
+    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
+    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
+    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
+    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
+    case PIX_FMT_RGB8     :
+    case PIX_FMT_BGR8     :
+    case PIX_FMT_PAL8     :
+    case PIX_FMT_BGR4_BYTE:
+    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
+    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
+    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
+    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
+    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
+    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
+    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
+    case PIX_FMT_RGB48BE:
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
+    case PIX_FMT_BGR48BE:
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
+    }
+    if (c->alpPixBuf) {
+        switch (srcFormat) {
+        case PIX_FMT_RGB32  :
+        case PIX_FMT_RGB32_1:
+        case PIX_FMT_BGR32  :
+        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
+        case PIX_FMT_Y400A  : c->alpToYV12 = yuy2ToY_c; break;
+        }
+    }
+
+    switch (srcFormat) {
+    case PIX_FMT_Y400A  :
+        c->alpSrcOffset = 1;
+        break;
+    case PIX_FMT_RGB32  :
+    case PIX_FMT_BGR32  :
+        c->alpSrcOffset = 3;
+        break;
+    case PIX_FMT_RGB48LE:
+    case PIX_FMT_BGR48LE:
+        c->lumSrcOffset = 1;
+        c->chrSrcOffset = 1;
+        c->alpSrcOffset = 1;
+        break;
+    }
+
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->srcRange) {
+            c->lumConvertRange = lumRangeFromJpeg_c;
+            c->chrConvertRange = chrRangeFromJpeg_c;
+        } else {
+            c->lumConvertRange = lumRangeToJpeg_c;
+            c->chrConvertRange = chrRangeToJpeg_c;
+        }
+    }
+
+    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
+          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
+        c->needs_hcscale = 1;
+}
 
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
deleted file mode 100644
index 383f01888f..0000000000
--- a/libswscale/swscale_template.c
+++ /dev/null
@@ -1,950 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
-                              const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrUSrc,
-                              const int16_t **chrVSrc,
-                              int chrFilterSize, const int16_t **alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
-{
-    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
-}
-
-static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
-                               const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc,
-                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                               int dstW, int chrDstW, enum PixelFormat dstFormat)
-{
-    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                 dest, uDest, dstW, chrDstW, dstFormat);
-}
-
-static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                              const int16_t *chrUSrc, const int16_t *chrVSrc,
-                              const int16_t *alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
-        dest[i]= av_clip_uint8(val);
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++) {
-            int u=(chrUSrc[i]+64)>>7;
-            int v=(chrVSrc[i]+64)>>7;
-            uDest[i]= av_clip_uint8(u);
-            vDest[i]= av_clip_uint8(v);
-        }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest)
-        for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
-            aDest[i]= av_clip_uint8(val);
-        }
-}
-
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
-                                 const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-                                 const int16_t **chrVSrc,
-                                 int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, int dstW, int dstY)
-{
-        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                       alpSrc, dest, dstW, dstY);
-}
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest, int dstW,
-                                 int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest, int dstW,
-                                 int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
-{
-    const int yalpha1=0;
-    int i;
-
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    }
-}
-
-//FIXME yuy2* can read up to 7 samples too much
-
-static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                             uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i];
-}
-
-static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[4*i + 1];
-        dstV[i]= src1[4*i + 3];
-    }
-    assert(src1 == src2);
-}
-
-static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
-    // we need to skip each second pixel. Same for BEToUV.
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[2*i + 1];
-        dstV[i]= src2[2*i + 1];
-    }
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
-                             uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i+1];
-}
-
-static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[4*i + 0];
-        dstV[i]= src1[4*i + 2];
-    }
-    assert(src1 == src2);
-}
-
-static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dstU[i]= src1[2*i];
-        dstV[i]= src2[2*i];
-    }
-}
-
-static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                              const uint8_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dst1[i] = src[2*i+0];
-        dst2[i] = src[2*i+1];
-    }
-}
-
-static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
-                              int width, uint32_t *unused)
-{
-    nvXXtoUV_c(dstU, dstV, src1, width);
-}
-
-static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
-                              int width, uint32_t *unused)
-{
-    nvXXtoUV_c(dstV, dstU, src1, width);
-}
-
-// FIXME Maybe dither instead.
-#define YUV_NBPS(depth, endianness, rfunc) \
-static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
-                                          const uint8_t *_srcU, const uint8_t *_srcV, \
-                                          int width, uint32_t *unused) \
-{ \
-    int i; \
-    const uint16_t *srcU = (const uint16_t*)_srcU; \
-    const uint16_t *srcV = (const uint16_t*)_srcV; \
-    for (i = 0; i < width; i++) { \
-        dstU[i] = rfunc(&srcU[i])>>(depth-8); \
-        dstV[i] = rfunc(&srcV[i])>>(depth-8); \
-    } \
-} \
-\
-static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, int width, uint32_t *unused) \
-{ \
-    int i; \
-    const uint16_t *srcY = (const uint16_t*)_srcY; \
-    for (i = 0; i < width; i++) \
-        dstY[i] = rfunc(&srcY[i])>>(depth-8); \
-} \
-
-YUV_NBPS( 9, LE, AV_RL16)
-YUV_NBPS( 9, BE, AV_RB16)
-YUV_NBPS(10, LE, AV_RL16)
-YUV_NBPS(10, BE, AV_RB16)
-
-static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
-                              int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src[i*3+0];
-        int g= src[i*3+1];
-        int r= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-}
-
-static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int r= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-    assert(src1 == src2);
-}
-
-static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int b= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int r= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-    assert(src1 == src2);
-}
-
-static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                              uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        int r= src[i*3+0];
-        int g= src[i*3+1];
-        int b= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-}
-
-static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++) {
-        int r= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int b= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-}
-
-static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++) {
-        int r= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int b= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-}
-
-
-// bilinear / bicubic scaling
-static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
-                            int srcW, int xInc,
-                            const int16_t *filter, const int16_t *filterPos,
-                            int filterSize)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int j;
-        int srcPos= filterPos[i];
-        int val=0;
-        for (j=0; j<filterSize; j++) {
-            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
-        }
-        //filter += hFilterSize;
-        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
-        //dst[i] = val>>7;
-    }
-}
-
-//FIXME all pal and rgb srcFormats could do this convertion as well
-//FIXME all scalers more complex than bilinear could do half of this transform
-static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
-        dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
-    }
-}
-static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
-        dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
-    }
-}
-static void lumRangeToJpeg_c(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
-}
-static void lumRangeFromJpeg_c(uint16_t *dst, int width)
-{
-    int i;
-    for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*14071 + 33561947)>>14;
-}
-
-static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
-                                  const uint8_t *src, int srcW, int xInc)
-{
-    int i;
-    unsigned int xpos=0;
-    for (i=0;i<dstWidth;i++) {
-        register unsigned int xx=xpos>>16;
-        register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
-        xpos+=xInc;
-    }
-}
-
-      // *** horizontal scale Y line to temp buffer
-static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
-                             const uint8_t *src, int srcW, int xInc,
-                             const int16_t *hLumFilter,
-                             const int16_t *hLumFilterPos, int hLumFilterSize,
-                             uint8_t *formatConvBuffer,
-                             uint32_t *pal, int isAlpha)
-{
-    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
-    void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
-
-    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
-
-    if (toYV12) {
-        toYV12(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-
-    if (!c->hyscale_fast) {
-        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
-    }
-
-    if (convertRange)
-        convertRange(dst, dstWidth);
-}
-
-static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                  int dstWidth, const uint8_t *src1,
-                                  const uint8_t *src2, int srcW, int xInc)
-{
-    int i;
-    unsigned int xpos=0;
-    for (i=0;i<dstWidth;i++) {
-        register unsigned int xx=xpos>>16;
-        register unsigned int xalpha=(xpos&0xFFFF)>>9;
-        dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
-        dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
-        xpos+=xInc;
-    }
-}
-
-inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int srcW, int xInc, const int16_t *hChrFilter,
-                             const int16_t *hChrFilterPos, int hChrFilterSize,
-                             uint8_t *formatConvBuffer, uint32_t *pal)
-{
-
-    src1 += c->chrSrcOffset;
-    src2 += c->chrSrcOffset;
-
-    if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
-        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= buf2;
-    }
-
-    if (!c->hcscale_fast) {
-        c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-    } else { // fast bilinear upscale / crap downscale
-        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
-    }
-
-    if (c->chrConvertRange)
-        c->chrConvertRange(dst1, dst2, dstWidth);
-}
-
-#define DEBUG_SWSCALE_BUFFERS 0
-#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-
-static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
-                     int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    /* load a few things into local vars to make the code more readable? and faster */
-    const int srcW= c->srcW;
-    const int dstW= c->dstW;
-    const int dstH= c->dstH;
-    const int chrDstW= c->chrDstW;
-    const int chrSrcW= c->chrSrcW;
-    const int lumXInc= c->lumXInc;
-    const int chrXInc= c->chrXInc;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    const int flags= c->flags;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int16_t *hLumFilter= c->hLumFilter;
-    int16_t *hChrFilter= c->hChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int hLumFilterSize= c->hLumFilterSize;
-    const int hChrFilterSize= c->hChrFilterSize;
-    int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrUPixBuf= c->chrUPixBuf;
-    int16_t **chrVPixBuf= c->chrVPixBuf;
-    int16_t **alpPixBuf= c->alpPixBuf;
-    const int vLumBufSize= c->vLumBufSize;
-    const int vChrBufSize= c->vChrBufSize;
-    uint8_t *formatConvBuffer= c->formatConvBuffer;
-    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
-    int lastDstY;
-    uint32_t *pal=c->pal_yuv;
-
-    /* vars which will change and which we need to store back in the context */
-    int dstY= c->dstY;
-    int lumBufIndex= c->lumBufIndex;
-    int chrBufIndex= c->chrBufIndex;
-    int lastInLumBuf= c->lastInLumBuf;
-    int lastInChrBuf= c->lastInChrBuf;
-
-    if (isPacked(c->srcFormat)) {
-        src[0]=
-        src[1]=
-        src[2]=
-        src[3]= src[0];
-        srcStride[0]=
-        srcStride[1]=
-        srcStride[2]=
-        srcStride[3]= srcStride[0];
-    }
-    srcStride[1]<<= c->vChrDrop;
-    srcStride[2]<<= c->vChrDrop;
-
-    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
-                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
-                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
-    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
-                   srcSliceY,    srcSliceH,    dstY,    dstH);
-    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
-                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
-
-    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
-        static int warnedAlready=0; //FIXME move this into the context perhaps
-        if (flags & SWS_PRINT_INFO && !warnedAlready) {
-            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
-                   "         ->cannot do aligned memory accesses anymore\n");
-            warnedAlready=1;
-        }
-    }
-
-    /* Note the user might start scaling the picture in the middle so this
-       will not get executed. This is not really intended but works
-       currently, so people might do it. */
-    if (srcSliceY ==0) {
-        lumBufIndex=-1;
-        chrBufIndex=-1;
-        dstY=0;
-        lastInLumBuf= -1;
-        lastInChrBuf= -1;
-    }
-
-    lastDstY= dstY;
-
-    for (;dstY < dstH; dstY++) {
-        unsigned char *dest =dst[0]+dstStride[0]*dstY;
-        const int chrDstY= dstY>>c->chrDstVSubSample;
-        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
-
-        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
-        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-        int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
-        int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
-        int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
-        int enough_lines;
-
-        //handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
-        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
-        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
-        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
-
-        DEBUG_BUFFERS("dstY: %d\n", dstY);
-        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
-                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
-        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
-                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
-
-        // Do we have enough lines in this slice to output the dstY line
-        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
-
-        if (!enough_lines) {
-            lastLumSrcY = srcSliceY + srcSliceH - 1;
-            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
-            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
-                                            lastLumSrcY, lastChrSrcY);
-        }
-
-        //Do horizontal scaling
-        while(lastInLumBuf < lastLumSrcY) {
-            const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
-            lumBufIndex++;
-            assert(lumBufIndex < 2*vLumBufSize);
-            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-            assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
-                      hLumFilter, hLumFilterPos, hLumFilterSize,
-                      formatConvBuffer,
-                      pal, 0);
-            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
-                          lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
-                          formatConvBuffer,
-                          pal, 1);
-            lastInLumBuf++;
-            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
-                               lumBufIndex,    lastInLumBuf);
-        }
-        while(lastInChrBuf < lastChrSrcY) {
-            const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
-            const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
-            chrBufIndex++;
-            assert(chrBufIndex < 2*vChrBufSize);
-            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
-            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-            //FIXME replace parameters through context struct (some at least)
-
-            if (c->needs_hcscale)
-                hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
-                          chrDstW, src1, src2, chrSrcW, chrXInc,
-                          hChrFilter, hChrFilterPos, hChrFilterSize,
-                          formatConvBuffer, pal);
-            lastInChrBuf++;
-            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
-                               chrBufIndex,    lastInChrBuf);
-        }
-        //wrap buf index around to stay inside the ring buffer
-        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
-        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
-        if (!enough_lines)
-            break; //we can't output a dstY line so let's try with the next slice
-
-#if HAVE_MMX
-        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
-#endif
-        if (dstY < dstH-2) {
-            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                c->yuv2nv12X(c,
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                  chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
-                                  (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
-                    const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrUBuf= chrUSrcPtr[0];
-                    const int16_t *chrVBuf= chrVSrcPtr[0];
-                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
-                                uDest, vDest, aDest, dstW, chrDstW);
-                } else { //General YV12
-                    c->yuv2yuvX(c,
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
-                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
-                                         chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL,
-                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
-                    int lumAlpha= vLumFilter[2*dstY+1];
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    lumMmxFilter[2]=
-                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
-                    chrMmxFilter[2]=
-                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                       dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
-                } else { //general RGB
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c,
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packedX(c,
-                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                       alpSrcPtr, dest, dstW, dstY);
-                    }
-                }
-            }
-        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
-            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12XinC(
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
-                } else {
-                    yuv2yuvXinC(
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbXinC_full(c,
-                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                     alpSrcPtr, dest, dstW, dstY);
-                } else {
-                    yuv2packedXinC(c,
-                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                   alpSrcPtr, dest, dstW, dstY);
-                }
-            }
-        }
-    }
-
-    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
-        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
-
-#if HAVE_MMX2
-    if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
-        __asm__ volatile("sfence":::"memory");
-#endif
-    emms_c();
-
-    /* store changed local vars back in the context */
-    c->dstY= dstY;
-    c->lumBufIndex= lumBufIndex;
-    c->chrBufIndex= chrBufIndex;
-    c->lastInLumBuf= lastInLumBuf;
-    c->lastInChrBuf= lastInChrBuf;
-
-    return dstY - lastDstY;
-}
-
-static void sws_init_swScale_c(SwsContext *c)
-{
-    enum PixelFormat srcFormat = c->srcFormat;
-
-    c->yuv2nv12X    = yuv2nv12X_c;
-    c->yuv2yuv1     = yuv2yuv1_c;
-    c->yuv2yuvX     = yuv2yuvX_c;
-    c->yuv2packed1  = yuv2packed1_c;
-    c->yuv2packed2  = yuv2packed2_c;
-    c->yuv2packedX  = yuv2packedX_c;
-
-    c->hScale       = hScale_c;
-
-    if (c->flags & SWS_FAST_BILINEAR)
-    {
-        c->hyscale_fast = hyscale_fast_c;
-        c->hcscale_fast = hcscale_fast_c;
-    }
-
-    c->chrToYV12 = NULL;
-    switch(srcFormat) {
-        case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
-        case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
-        case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
-        case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
-        case PIX_FMT_RGB8     :
-        case PIX_FMT_BGR8     :
-        case PIX_FMT_PAL8     :
-        case PIX_FMT_BGR4_BYTE:
-        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
-        case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
-        case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
-        case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
-        case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
-        case PIX_FMT_YUV420P16BE:
-        case PIX_FMT_YUV422P16BE:
-        case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
-        case PIX_FMT_YUV420P16LE:
-        case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
-    }
-    if (c->chrSrcHSubSample) {
-        switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
-        }
-    } else {
-        switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
-        }
-    }
-
-    c->lumToYV12 = NULL;
-    c->alpToYV12 = NULL;
-    switch (srcFormat) {
-    case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
-    case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
-    case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
-    case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
-    case PIX_FMT_YUYV422  :
-    case PIX_FMT_YUV420P16BE:
-    case PIX_FMT_YUV422P16BE:
-    case PIX_FMT_YUV444P16BE:
-    case PIX_FMT_Y400A    :
-    case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
-    case PIX_FMT_UYVY422  :
-    case PIX_FMT_YUV420P16LE:
-    case PIX_FMT_YUV422P16LE:
-    case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
-    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
-    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
-    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
-    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
-    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
-    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
-    case PIX_FMT_RGB8     :
-    case PIX_FMT_BGR8     :
-    case PIX_FMT_PAL8     :
-    case PIX_FMT_BGR4_BYTE:
-    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
-    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
-    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
-    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
-    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
-    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
-    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
-    case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
-    case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
-    }
-    if (c->alpPixBuf) {
-        switch (srcFormat) {
-        case PIX_FMT_RGB32  :
-        case PIX_FMT_RGB32_1:
-        case PIX_FMT_BGR32  :
-        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
-        case PIX_FMT_Y400A  : c->alpToYV12 = yuy2ToY_c; break;
-        }
-    }
-
-    switch (srcFormat) {
-    case PIX_FMT_Y400A  :
-        c->alpSrcOffset = 1;
-        break;
-    case PIX_FMT_RGB32  :
-    case PIX_FMT_BGR32  :
-        c->alpSrcOffset = 3;
-        break;
-    case PIX_FMT_RGB48LE:
-    case PIX_FMT_BGR48LE:
-        c->lumSrcOffset = 1;
-        c->chrSrcOffset = 1;
-        c->alpSrcOffset = 1;
-        break;
-    }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->srcRange) {
-            c->lumConvertRange = lumRangeFromJpeg_c;
-            c->chrConvertRange = chrRangeFromJpeg_c;
-        } else {
-            c->lumConvertRange = lumRangeToJpeg_c;
-            c->chrConvertRange = chrRangeToJpeg_c;
-        }
-    }
-
-    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
-          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
-        c->needs_hcscale = 1;
-}

From 2762ee30347afd3c1d2795a232c8b78d56a44b0f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 22:34:13 -0700
Subject: [PATCH 562/830] swscale: cosmetics.

Remove duplicate "inC" and "_c" functions that do the same thing;
give each function that handles data and acts as a function pointer
a "_c" suffix; remove "_c" suffix from functions that are inherently
not optimizable. Remove inline keyword from functions that are only
used through function pointers.
---
 libswscale/swscale.c | 496 ++++++++++++++++++++-----------------------
 1 file changed, 236 insertions(+), 260 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 43d0d69055..56ceb13432 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -202,11 +202,14 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
 };
 #endif
 
-static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                                    const int16_t **chrVSrc, int chrFilterSize,
-                                                    const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
-                                                    int dstW, int chrDstW, int big_endian, int output_bits)
+static av_always_inline void
+yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, const int16_t **alpSrc,
+                      uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
+                      uint16_t *aDest, int dstW, int chrDstW,
+                      int big_endian, int output_bits)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
@@ -274,11 +277,11 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
                               uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
                               uint16_t *aDest, int dstW, int chrDstW) \
 { \
-    yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
-                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                           alpSrc, \
-                           dest, uDest, vDest, aDest, \
-                           dstW, chrDstW, is_be, bits); \
+    yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                          alpSrc, \
+                          dest, uDest, vDest, aDest, \
+                          dstW, chrDstW, is_be, bits); \
 }
 yuv2NBPS( 9, BE, 1);
 yuv2NBPS( 9, LE, 0);
@@ -287,10 +290,10 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
-                                 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
-                                 enum PixelFormat dstFormat)
+static inline void yuv2yuvX16_c(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
+                                const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
+                                enum PixelFormat dstFormat)
 {
 #define conv16(bits) \
     if (isBE(dstFormat)) { \
@@ -316,10 +319,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
 #undef conv16
 }
 
-static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc, int chrFilterSize,
-                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
+static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
+                              const int16_t **lumSrc, int lumFilterSize,
+                              const int16_t *chrFilter, const int16_t **chrUSrc,
+                              const int16_t **chrVSrc,
+                              int chrFilterSize, const int16_t **alpSrc,
+                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
@@ -358,10 +364,12 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
 
 }
 
-static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                                const int16_t **chrVSrc, int chrFilterSize,
-                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
+                               const int16_t **lumSrc, int lumFilterSize,
+                               const int16_t *chrFilter, const int16_t **chrUSrc,
+                               const int16_t **chrVSrc,
+                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+                               int dstW, int chrDstW, enum PixelFormat dstFormat)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
@@ -870,21 +878,21 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
         break;\
     }
 
-static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter,
-                                  const int16_t **lumSrc, int lumFilterSize,
-                                  const int16_t *chrFilter, const int16_t **chrUSrc,
-                                  const int16_t **chrVSrc, int chrFilterSize,
-                                  const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
+                          const int16_t **lumSrc, int lumFilterSize,
+                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                          const int16_t **chrVSrc, int chrFilterSize,
+                          const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 }
 
-static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc, int chrFilterSize,
-                                    const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
+                                   const int16_t **lumSrc, int lumFilterSize,
+                                   const int16_t *chrFilter, const int16_t **chrUSrc,
+                                   const int16_t **chrVSrc, int chrFilterSize,
+                                   const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     int step= c->dstFormatBpp/8;
@@ -976,8 +984,8 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width,
-                            uint32_t *unused)
+static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                       uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -989,9 +997,9 @@ static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width,
     }
 }
 
-static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
+static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                        const uint8_t *src1, const uint8_t *src2,
+                        int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1005,9 +1013,9 @@ static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
-                                  const uint8_t *src1, const uint8_t *src2,
-                                  int width, uint32_t *unused)
+static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1021,8 +1029,8 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, int width,
-                            uint32_t *unused)
+static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                       uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1034,9 +1042,9 @@ static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, int width,
     }
 }
 
-static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
+static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                        const uint8_t *src1, const uint8_t *src2,
+                        int width, uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1049,9 +1057,9 @@ static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
-                                  const uint8_t *src1, const uint8_t *src2,
-                                  int width, uint32_t *unused)
+static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1065,7 +1073,8 @@ static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
 }
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)\
+static void name ## _c(uint8_t *dst, const uint8_t *src, \
+                       int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1086,7 +1095,7 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static inline void abgrToA(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1095,7 +1104,9 @@ static inline void abgrToA(uint8_t *dst, const uint8_t *src, int width, uint32_t
 }
 
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
+static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
+                       const uint8_t *src, const uint8_t *dummy, \
+                       int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1107,7 +1118,9 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
     }\
 }\
-static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
+static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
+                            const uint8_t *src, const uint8_t *dummy, \
+                            int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1134,7 +1147,7 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
-static inline void palToY(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
+static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1144,9 +1157,9 @@ static inline void palToY(uint8_t *dst, const uint8_t *src, int width, uint32_t
     }
 }
 
-static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
-                           const uint8_t *src1, const uint8_t *src2,
-                           int width, uint32_t *pal)
+static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
+                      const uint8_t *src1, const uint8_t *src2,
+                      int width, uint32_t *pal)
 {
     int i;
     assert(src1 == src2);
@@ -1158,7 +1171,8 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
     }
 }
 
-static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
+                          int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1168,7 +1182,8 @@ static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, int width, uint
     }
 }
 
-static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
+                          int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1178,36 +1193,11 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, int width, uint
     }
 }
 
-static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
-                              const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrUSrc,
-                              const int16_t **chrVSrc,
-                              int chrFilterSize, const int16_t **alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
-{
-    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
-}
-
-static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
-                               const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc,
-                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                               int dstW, int chrDstW, enum PixelFormat dstFormat)
-{
-    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                 dest, uDest, dstW, chrDstW, dstFormat);
-}
-
-static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                              const int16_t *chrUSrc, const int16_t *chrVSrc,
-                              const int16_t *alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
+static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
+                       const int16_t *chrUSrc, const int16_t *chrVSrc,
+                       const int16_t *alpSrc,
+                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                       uint8_t *aDest, int dstW, int chrDstW)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -1230,31 +1220,15 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
         }
 }
 
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
-                                 const int16_t **lumSrc, int lumFilterSize,
-                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-                                 const int16_t **chrVSrc,
-                                 int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, int dstW, int dstY)
-{
-        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
-                       alpSrc, dest, dstW, dstY);
-}
-
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest, int dstW,
-                                 int yalpha, int uvalpha, int y)
+static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *buf1, const uint16_t *ubuf0,
+                          const uint16_t *ubuf1, const uint16_t *vbuf0,
+                          const uint16_t *vbuf1, const uint16_t *abuf0,
+                          const uint16_t *abuf1, uint8_t *dest, int dstW,
+                          int yalpha, int uvalpha, int y)
 {
     int  yalpha1=4095- yalpha;
     int uvalpha1=4095-uvalpha;
@@ -1266,12 +1240,12 @@ static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest, int dstW,
-                                 int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
+static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *ubuf0, const uint16_t *ubuf1,
+                          const uint16_t *vbuf0, const uint16_t *vbuf1,
+                          const uint16_t *abuf0, uint8_t *dest, int dstW,
+                          int uvalpha, enum PixelFormat dstFormat,
+                          int flags, int y)
 {
     const int yalpha1=0;
     int i;
@@ -1288,16 +1262,16 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                             uint32_t *unused)
+static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                      uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++)
         dst[i]= src[2*i];
 }
 
-static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, int width, uint32_t *unused)
+static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                       const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1307,12 +1281,10 @@ static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
     assert(src1 == src2);
 }
 
-static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, int width, uint32_t *unused)
+static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                     const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
-    // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
-    // we need to skip each second pixel. Same for BEToUV.
     for (i=0; i<width; i++) {
         dstU[i]= src1[2*i + 1];
         dstV[i]= src2[2*i + 1];
@@ -1321,16 +1293,16 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
-                             uint32_t *unused)
+static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
+                      uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++)
         dst[i]= src[2*i+1];
 }
 
-static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, int width, uint32_t *unused)
+static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                       const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1340,8 +1312,8 @@ static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
     assert(src1 == src2);
 }
 
-static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, int width, uint32_t *unused)
+static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                     const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1350,8 +1322,8 @@ static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
     }
 }
 
-static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                              const uint8_t *src, int width)
+static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
+                                        const uint8_t *src, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1360,23 +1332,23 @@ static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
     }
 }
 
-static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
-                              int width, uint32_t *unused)
+static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                       const uint8_t *src1, const uint8_t *src2,
+                       int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstU, dstV, src1, width);
 }
 
-static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
-                              int width, uint32_t *unused)
+static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
+                       const uint8_t *src1, const uint8_t *src2,
+                       int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
 // FIXME Maybe dither instead.
 #define YUV_NBPS(depth, endianness, rfunc) \
-static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
                                           const uint8_t *_srcU, const uint8_t *_srcV, \
                                           int width, uint32_t *unused) \
 { \
@@ -1389,7 +1361,8 @@ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
     } \
 } \
 \
-static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, int width, uint32_t *unused) \
+static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
+                                         int width, uint32_t *unused) \
 { \
     int i; \
     const uint16_t *srcY = (const uint16_t*)_srcY; \
@@ -1402,8 +1375,8 @@ YUV_NBPS( 9, BE, AV_RB16)
 YUV_NBPS(10, LE, AV_RL16)
 YUV_NBPS(10, BE, AV_RB16)
 
-static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
-                              int width, uint32_t *unused)
+static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+                       int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1415,8 +1388,8 @@ static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
     }
 }
 
-static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, int width, uint32_t *unused)
+static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                        const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1430,8 +1403,8 @@ static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
     assert(src1 == src2);
 }
 
-static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, int width, uint32_t *unused)
+static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                             const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1445,8 +1418,8 @@ static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                              uint32_t *unused)
+static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
+                       uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1458,8 +1431,8 @@ static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
     }
 }
 
-static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, int width, uint32_t *unused)
+static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                        const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1473,8 +1446,8 @@ static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
     }
 }
 
-static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, int width, uint32_t *unused)
+static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+                             const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1490,10 +1463,10 @@ static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
 
 
 // bilinear / bicubic scaling
-static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
-                            int srcW, int xInc,
-                            const int16_t *filter, const int16_t *filterPos,
-                            int filterSize)
+static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
+                     int srcW, int xInc,
+                     const int16_t *filter, const int16_t *filterPos,
+                     int filterSize)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -1540,8 +1513,8 @@ static void lumRangeFromJpeg_c(uint16_t *dst, int width)
         dst[i] = (dst[i]*14071 + 33561947)>>14;
 }
 
-static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
-                                  const uint8_t *src, int srcW, int xInc)
+static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
+                           const uint8_t *src, int srcW, int xInc)
 {
     int i;
     unsigned int xpos=0;
@@ -1553,13 +1526,13 @@ static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
     }
 }
 
-      // *** horizontal scale Y line to temp buffer
-static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
-                             const uint8_t *src, int srcW, int xInc,
-                             const int16_t *hLumFilter,
-                             const int16_t *hLumFilterPos, int hLumFilterSize,
-                             uint8_t *formatConvBuffer,
-                             uint32_t *pal, int isAlpha)
+// *** horizontal scale Y line to temp buffer
+static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
+                           const uint8_t *src, int srcW, int xInc,
+                           const int16_t *hLumFilter,
+                           const int16_t *hLumFilterPos, int hLumFilterSize,
+                           uint8_t *formatConvBuffer,
+                           uint32_t *pal, int isAlpha)
 {
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
@@ -1581,9 +1554,9 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
         convertRange(dst, dstWidth);
 }
 
-static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                  int dstWidth, const uint8_t *src1,
-                                  const uint8_t *src2, int srcW, int xInc)
+static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
+                           int dstWidth, const uint8_t *src1,
+                           const uint8_t *src2, int srcW, int xInc)
 {
     int i;
     unsigned int xpos=0;
@@ -1596,11 +1569,11 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
     }
 }
 
-inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int srcW, int xInc, const int16_t *hChrFilter,
-                             const int16_t *hChrFilterPos, int hChrFilterSize,
-                             uint8_t *formatConvBuffer, uint32_t *pal)
+static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
+                           const uint8_t *src1, const uint8_t *src2,
+                           int srcW, int xInc, const int16_t *hChrFilter,
+                           const int16_t *hChrFilterPos, int hChrFilterSize,
+                           uint8_t *formatConvBuffer, uint32_t *pal)
 {
 
     src1 += c->chrSrcOffset;
@@ -1627,8 +1600,9 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
-static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
-                     int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
+static int swScale(SwsContext *c, const uint8_t* src[],
+                   int srcStride[], int srcSliceY,
+                   int srcSliceH, uint8_t* dst[], int dstStride[])
 {
     /* load a few things into local vars to make the code more readable? and faster */
     const int srcW= c->srcW;
@@ -1762,15 +1736,15 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
             assert(lumBufIndex < 2*vLumBufSize);
             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
             assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
-                      hLumFilter, hLumFilterPos, hLumFilterSize,
-                      formatConvBuffer,
-                      pal, 0);
+            hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+                    hLumFilter, hLumFilterPos, hLumFilterSize,
+                    formatConvBuffer,
+                    pal, 0);
             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
-                          lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
-                          formatConvBuffer,
-                          pal, 1);
+                hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
+                        lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
+                        formatConvBuffer,
+                        pal, 1);
             lastInLumBuf++;
             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
                                lumBufIndex,    lastInLumBuf);
@@ -1785,7 +1759,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
             //FIXME replace parameters through context struct (some at least)
 
             if (c->needs_hcscale)
-                hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
+                hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
                           chrDstW, src1, src2, chrSrcW, chrXInc,
                           hChrFilter, hChrFilterPos, hChrFilterSize,
                           formatConvBuffer, pal);
@@ -1818,12 +1792,12 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                  chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
-                                  (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
+                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
+                                 chrVSrcPtr, vChrFilterSize,
+                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
+                                 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                 dstFormat);
                 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                     const int16_t *lumBuf = lumSrcPtr[0];
                     const int16_t *chrUBuf= chrUSrcPtr[0];
@@ -1844,11 +1818,11 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
                     if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
-                                         chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
+                        yuv2rgbX_c_full(c, //FIXME write a packed1_full function
+                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
+                                        chrVSrcPtr, vChrFilterSize,
+                                        alpSrcPtr, dest, dstW, dstY);
                     } else {
                         c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
                                        *chrVSrcPtr, *(chrVSrcPtr+1),
@@ -1863,10 +1837,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     chrMmxFilter[2]=
                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
                     if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
+                        yuv2rgbX_c_full(c, //FIXME write a packed2_full function
+                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                        alpSrcPtr, dest, dstW, dstY);
                     } else {
                         c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
                                        *chrVSrcPtr, *(chrVSrcPtr+1),
@@ -1875,10 +1849,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
                     }
                 } else { //general RGB
                     if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbXinC_full(c,
-                                         vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                         vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                         alpSrcPtr, dest, dstW, dstY);
+                        yuv2rgbX_c_full(c,
+                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                        alpSrcPtr, dest, dstW, dstY);
                     } else {
                         c->yuv2packedX(c,
                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
@@ -1895,38 +1869,40 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12XinC(
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
+                yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
+                            lumSrcPtr, vLumFilterSize,
+                            vChrFilter+chrDstY*vChrFilterSize,
+                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                            dest, uDest, dstW, chrDstW, dstFormat);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16inC(
-                                  vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                  dstFormat);
+                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                 dstFormat);
                 } else {
-                    yuv2yuvXinC(
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                    yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
+                               lumSrcPtr, vLumFilterSize,
+                               vChrFilter+chrDstY*vChrFilterSize,
+                               chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                               alpSrcPtr, dest, uDest, vDest, aDest,
+                               dstW, chrDstW);
                 }
             } else {
                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbXinC_full(c,
-                                     vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                     vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                     alpSrcPtr, dest, dstW, dstY);
+                    yuv2rgbX_c_full(c,
+                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                    alpSrcPtr, dest, dstW, dstY);
                 } else {
-                    yuv2packedXinC(c,
-                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                   alpSrcPtr, dest, dstW, dstY);
+                    yuv2packedX_c(c,
+                                  vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                  vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                  alpSrcPtr, dest, dstW, dstY);
                 }
             }
         }
@@ -1980,7 +1956,7 @@ static void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_BGR8     :
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
-        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
+        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
         case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
         case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
         case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
@@ -1995,36 +1971,36 @@ static void sws_init_swScale_c(SwsContext *c)
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
         case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
         case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half_c;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
         }
     } else {
         switch(srcFormat) {
         case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
         case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
+        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
+        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
+        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
+        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
+        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_c;  break;
+        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
+        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
+        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
         }
     }
 
@@ -2047,33 +2023,33 @@ static void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_YUV444P16LE:
     case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
-    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
-    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
+    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY_c; break;
+    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY_c; break;
     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
-    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
-    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
+    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY_c; break;
+    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY_c; break;
     case PIX_FMT_RGB8     :
     case PIX_FMT_BGR8     :
     case PIX_FMT_PAL8     :
     case PIX_FMT_BGR4_BYTE:
-    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
-    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
-    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
-    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
-    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
-    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
-    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
+    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
+    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
+    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
+    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
+    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
+    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
+    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
     case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
     case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
     }
     if (c->alpPixBuf) {
         switch (srcFormat) {
         case PIX_FMT_RGB32  :
         case PIX_FMT_RGB32_1:
         case PIX_FMT_BGR32  :
-        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
+        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
         case PIX_FMT_Y400A  : c->alpToYV12 = yuy2ToY_c; break;
         }
     }
@@ -2118,7 +2094,7 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
     if (HAVE_ALTIVEC)
         ff_sws_init_swScale_altivec(c);
 
-    return swScale_c;
+    return swScale;
 }
 
 static void copyPlane(const uint8_t *src, int srcStride,

From 6af2801088aef6dd7aa688e88073f13bc7a8a4f4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 2 Jun 2011 22:45:16 -0700
Subject: [PATCH 563/830] swscale: split swscale.c in unscaled and generic
 conversion routines.

This duplicates the function fillPlane().
---
 libswscale/Makefile           |   3 +-
 libswscale/swscale.c          | 839 --------------------------------
 libswscale/swscale_internal.h |   7 +
 libswscale/swscale_unscaled.c | 887 ++++++++++++++++++++++++++++++++++
 4 files changed, 896 insertions(+), 840 deletions(-)
 create mode 100644 libswscale/swscale_unscaled.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 1d62b13af4..8bb06baae2 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -5,7 +5,8 @@ FFLIBS = avutil
 
 HEADERS = swscale.h
 
-OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o
+OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o \
+       swscale_unscaled.o
 
 OBJS-$(ARCH_BFIN)          +=  bfin/internal_bfin.o     \
                                bfin/swscale_bfin.o      \
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 56ceb13432..fd64b81019 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -69,14 +69,6 @@ untested special converters
 
 #define DITHER1XBPP
 
-#define isPacked(x)         (       \
-           (x)==PIX_FMT_PAL8        \
-        || (x)==PIX_FMT_YUYV422     \
-        || (x)==PIX_FMT_UYVY422     \
-        || (x)==PIX_FMT_Y400A       \
-        || isAnyRGB(x)              \
-    )
-
 #define RGB2YUV_SHIFT 15
 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
@@ -2096,834 +2088,3 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
 
     return swScale;
 }
-
-static void copyPlane(const uint8_t *src, int srcStride,
-                      int srcSliceY, int srcSliceH, int width,
-                      uint8_t *dst, int dstStride)
-{
-    dst += dstStride * srcSliceY;
-    if (dstStride == srcStride && srcStride > 0) {
-        memcpy(dst, src, srcSliceH * dstStride);
-    } else {
-        int i;
-        for (i=0; i<srcSliceH; i++) {
-            memcpy(dst, src, width);
-            src += srcStride;
-            dst += dstStride;
-        }
-    }
-}
-
-static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2;
-
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dstParam[0], dstStride[0]);
-
-    if (c->dstFormat == PIX_FMT_NV12)
-        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
-    else
-        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
-    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
-    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
-
-    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    if (dstParam[3])
-        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-
-    return srcSliceH;
-}
-
-static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
-    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
-    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
-
-    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
-    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
-    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
-
-    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    if (dstParam[3])
-        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-
-    return srcSliceH;
-}
-
-static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
-{
-    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
-    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
-    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
-
-    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
-{
-    int i;
-    for (i=0; i<num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
-}
-
-static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i=0; i<num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
-}
-
-static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i=0; i<num_pixels; i++) {
-        //FIXME slow?
-        dst[0]= palette[src[i<<1]*4+0];
-        dst[1]= palette[src[i<<1]*4+1];
-        dst[2]= palette[src[i<<1]*4+2];
-        dst+= 3;
-    }
-}
-
-static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    const enum PixelFormat srcFormat= c->srcFormat;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
-                 const uint8_t *palette)=NULL;
-    int i;
-    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-    const uint8_t *srcPtr= src[0];
-
-    if (srcFormat == PIX_FMT_Y400A) {
-        switch (dstFormat) {
-        case PIX_FMT_RGB32  : conv = gray8aToPacked32; break;
-        case PIX_FMT_BGR32  : conv = gray8aToPacked32; break;
-        case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break;
-        case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break;
-        case PIX_FMT_RGB24  : conv = gray8aToPacked24; break;
-        case PIX_FMT_BGR24  : conv = gray8aToPacked24; break;
-        }
-    } else if (usePal(srcFormat)) {
-        switch (dstFormat) {
-        case PIX_FMT_RGB32  : conv = sws_convertPalette8ToPacked32; break;
-        case PIX_FMT_BGR32  : conv = sws_convertPalette8ToPacked32; break;
-        case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break;
-        case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break;
-        case PIX_FMT_RGB24  : conv = sws_convertPalette8ToPacked24; break;
-        case PIX_FMT_BGR24  : conv = sws_convertPalette8ToPacked24; break;
-        }
-    }
-
-    if (!conv)
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
-    else {
-        for (i=0; i<srcSliceH; i++) {
-            conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
-            srcPtr+= srcStride[0];
-            dstPtr+= dstStride[0];
-        }
-    }
-
-    return srcSliceH;
-}
-
-#define isRGBA32(x) (            \
-           (x) == PIX_FMT_ARGB   \
-        || (x) == PIX_FMT_RGBA   \
-        || (x) == PIX_FMT_BGRA   \
-        || (x) == PIX_FMT_ABGR   \
-        )
-
-/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
-static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    const enum PixelFormat srcFormat= c->srcFormat;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    const int srcBpp= (c->srcFormatBpp + 7) >> 3;
-    const int dstBpp= (c->dstFormatBpp + 7) >> 3;
-    const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
-    const int dstId= c->dstFormatBpp >> 2;
-    void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
-
-#define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
-
-    if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
-        if (     CONV_IS(ABGR, RGBA)
-              || CONV_IS(ARGB, BGRA)
-              || CONV_IS(BGRA, ARGB)
-              || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210;
-        else if (CONV_IS(ABGR, ARGB)
-              || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321;
-        else if (CONV_IS(ABGR, BGRA)
-              || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230;
-        else if (CONV_IS(BGRA, RGBA)
-              || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
-        else if (CONV_IS(BGRA, ABGR)
-              || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
-    } else
-    /* BGR -> BGR */
-    if (  (isBGRinInt(srcFormat) && isBGRinInt(dstFormat))
-       || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
-        switch(srcId | (dstId<<4)) {
-        case 0x34: conv= rgb16to15; break;
-        case 0x36: conv= rgb24to15; break;
-        case 0x38: conv= rgb32to15; break;
-        case 0x43: conv= rgb15to16; break;
-        case 0x46: conv= rgb24to16; break;
-        case 0x48: conv= rgb32to16; break;
-        case 0x63: conv= rgb15to24; break;
-        case 0x64: conv= rgb16to24; break;
-        case 0x68: conv= rgb32to24; break;
-        case 0x83: conv= rgb15to32; break;
-        case 0x84: conv= rgb16to32; break;
-        case 0x86: conv= rgb24to32; break;
-        }
-    } else if (  (isBGRinInt(srcFormat) && isRGBinInt(dstFormat))
-             || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
-        switch(srcId | (dstId<<4)) {
-        case 0x33: conv= rgb15tobgr15; break;
-        case 0x34: conv= rgb16tobgr15; break;
-        case 0x36: conv= rgb24tobgr15; break;
-        case 0x38: conv= rgb32tobgr15; break;
-        case 0x43: conv= rgb15tobgr16; break;
-        case 0x44: conv= rgb16tobgr16; break;
-        case 0x46: conv= rgb24tobgr16; break;
-        case 0x48: conv= rgb32tobgr16; break;
-        case 0x63: conv= rgb15tobgr24; break;
-        case 0x64: conv= rgb16tobgr24; break;
-        case 0x66: conv= rgb24tobgr24; break;
-        case 0x68: conv= rgb32tobgr24; break;
-        case 0x83: conv= rgb15tobgr32; break;
-        case 0x84: conv= rgb16tobgr32; break;
-        case 0x86: conv= rgb24tobgr32; break;
-        }
-    }
-
-    if (!conv) {
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
-    } else {
-        const uint8_t *srcPtr= src[0];
-              uint8_t *dstPtr= dst[0];
-        if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat))
-            srcPtr += ALT32_CORR;
-
-        if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat))
-            dstPtr += ALT32_CORR;
-
-        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
-            conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
-        else {
-            int i;
-            dstPtr += dstStride[0]*srcSliceY;
-
-            for (i=0; i<srcSliceH; i++) {
-                conv(srcPtr, dstPtr, c->srcW*srcBpp);
-                srcPtr+= srcStride[0];
-                dstPtr+= dstStride[0];
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                              int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    rgb24toyv12(
-        src[0],
-        dst[0]+ srcSliceY    *dstStride[0],
-        dst[1]+(srcSliceY>>1)*dstStride[1],
-        dst[2]+(srcSliceY>>1)*dstStride[2],
-        c->srcW, srcSliceH,
-        dstStride[0], dstStride[1], srcStride[0]);
-    if (dst[3])
-        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-    return srcSliceH;
-}
-
-static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                             int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
-              dst[0], dstStride[0]);
-
-    planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
-             srcSliceH >> 2, srcStride[1], dstStride[1]);
-    planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
-             srcSliceH >> 2, srcStride[2], dstStride[2]);
-    if (dst[3])
-        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-    return srcSliceH;
-}
-
-/* unscaled copy like stuff (assumes nearly identical formats) */
-static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                             int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
-        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
-    else {
-        int i;
-        const uint8_t *srcPtr= src[0];
-        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-        int length=0;
-
-        /* universal length finder */
-        while(length+c->srcW <= FFABS(dstStride[0])
-           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
-        assert(length!=0);
-
-        for (i=0; i<srcSliceH; i++) {
-            memcpy(dstPtr, srcPtr, length);
-            srcPtr+= srcStride[0];
-            dstPtr+= dstStride[0];
-        }
-    }
-    return srcSliceH;
-}
-
-static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
-                             int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    int plane, i, j;
-    for (plane=0; plane<4; plane++) {
-        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
-        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
-        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
-        const uint8_t *srcPtr= src[plane];
-        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
-
-        if (!dst[plane]) continue;
-        // ignore palette for GRAY8
-        if (plane == 1 && !dst[2]) continue;
-        if (!src[plane] || (plane == 1 && !src[2])) {
-            if(is16BPS(c->dstFormat))
-                length*=2;
-            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
-        } else {
-            if(is9_OR_10BPS(c->srcFormat)) {
-                const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
-                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
-                const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
-
-                if (is16BPS(c->dstFormat)) {
-                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-#define COPY9_OR_10TO16(rfunc, wfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            int srcpx = rfunc(&srcPtr2[j]); \
-                            wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr2 += srcStride[plane]/2; \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO16(AV_RB16, AV_WB16);
-                        } else {
-                            COPY9_OR_10TO16(AV_RL16, AV_WB16);
-                        }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO16(AV_RB16, AV_WL16);
-                        } else {
-                            COPY9_OR_10TO16(AV_RL16, AV_WL16);
-                        }
-                    }
-                } else if (is9_OR_10BPS(c->dstFormat)) {
-                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-#define COPY9_OR_10TO9_OR_10(loop) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            loop; \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr2 += srcStride[plane]/2; \
-                    }
-#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
-                    if (dst_depth > src_depth) { \
-                        COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
-                            wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
-                    } else if (dst_depth < src_depth) { \
-                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
-                    } else { \
-                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
-                        } else {
-                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
-                        }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
-                        } else {
-                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
-                        }
-                    }
-                } else {
-                    // FIXME Maybe dither instead.
-#define COPY9_OR_10TO8(rfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
-                        } \
-                        dstPtr  += dstStride[plane]; \
-                        srcPtr2 += srcStride[plane]/2; \
-                    }
-                    if (isBE(c->srcFormat)) {
-                        COPY9_OR_10TO8(AV_RB16);
-                    } else {
-                        COPY9_OR_10TO8(AV_RL16);
-                    }
-                }
-            } else if(is9_OR_10BPS(c->dstFormat)) {
-                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
-                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-
-                if (is16BPS(c->srcFormat)) {
-                    const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
-#define COPY16TO9_OR_10(rfunc, wfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr2 += srcStride[plane]/2; \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY16TO9_OR_10(AV_RB16, AV_WB16);
-                        } else {
-                            COPY16TO9_OR_10(AV_RL16, AV_WB16);
-                        }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY16TO9_OR_10(AV_RB16, AV_WL16);
-                        } else {
-                            COPY16TO9_OR_10(AV_RL16, AV_WL16);
-                        }
-                    }
-                } else /* 8bit */ {
-#define COPY8TO9_OR_10(wfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            const int srcpx = srcPtr[j]; \
-                            wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr  += srcStride[plane]; \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        COPY8TO9_OR_10(AV_WB16);
-                    } else {
-                        COPY8TO9_OR_10(AV_WL16);
-                    }
-                }
-            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
-                if (!isBE(c->srcFormat)) srcPtr++;
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++) {
-                        dstPtr[ j<<1   ] = srcPtr[j];
-                        dstPtr[(j<<1)+1] = srcPtr[j];
-                    }
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
-                  && isBE(c->srcFormat) != isBE(c->dstFormat)) {
-
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++)
-                        ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]);
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            } else if (dstStride[plane] == srcStride[plane] &&
-                       srcStride[plane] > 0 && srcStride[plane] == length) {
-                memcpy(dst[plane] + dstStride[plane]*y, src[plane],
-                       height*dstStride[plane]);
-            } else {
-                if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
-                    length*=2;
-                for (i=0; i<height; i++) {
-                    memcpy(dstPtr, srcPtr, length);
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-void ff_get_unscaled_swscale(SwsContext *c)
-{
-    const enum PixelFormat srcFormat = c->srcFormat;
-    const enum PixelFormat dstFormat = c->dstFormat;
-    const int flags = c->flags;
-    const int dstH = c->dstH;
-    int needsDither;
-
-    needsDither= isAnyRGB(dstFormat)
-        &&  c->dstFormatBpp < 24
-        && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat)));
-
-    /* yv12_to_nv12 */
-    if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
-        c->swScale= planarToNv12Wrapper;
-    }
-    /* yuv2bgr */
-    if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat)
-        && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
-        c->swScale= ff_yuv2rgb_get_func_ptr(c);
-    }
-
-    if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
-        c->swScale= yvu9ToYv12Wrapper;
-    }
-
-    /* bgr24toYV12 */
-    if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
-        c->swScale= bgr24ToYv12Wrapper;
-
-    /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-    if (   isAnyRGB(srcFormat)
-        && isAnyRGB(dstFormat)
-        && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
-        && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
-        && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
-        && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
-        && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
-        && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
-        && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
-        && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
-        && srcFormat != PIX_FMT_RGB48LE   && dstFormat != PIX_FMT_RGB48LE
-        && srcFormat != PIX_FMT_RGB48BE   && dstFormat != PIX_FMT_RGB48BE
-        && srcFormat != PIX_FMT_BGR48LE   && dstFormat != PIX_FMT_BGR48LE
-        && srcFormat != PIX_FMT_BGR48BE   && dstFormat != PIX_FMT_BGR48BE
-        && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
-        c->swScale= rgbToRgbWrapper;
-
-    if ((usePal(srcFormat) && (
-        dstFormat == PIX_FMT_RGB32   ||
-        dstFormat == PIX_FMT_RGB32_1 ||
-        dstFormat == PIX_FMT_RGB24   ||
-        dstFormat == PIX_FMT_BGR32   ||
-        dstFormat == PIX_FMT_BGR32_1 ||
-        dstFormat == PIX_FMT_BGR24)))
-        c->swScale= palToRgbWrapper;
-
-    if (srcFormat == PIX_FMT_YUV422P) {
-        if (dstFormat == PIX_FMT_YUYV422)
-            c->swScale= yuv422pToYuy2Wrapper;
-        else if (dstFormat == PIX_FMT_UYVY422)
-            c->swScale= yuv422pToUyvyWrapper;
-    }
-
-    /* LQ converters if -sws 0 or -sws 4*/
-    if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
-        /* yv12_to_yuy2 */
-        if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) {
-            if (dstFormat == PIX_FMT_YUYV422)
-                c->swScale= planarToYuy2Wrapper;
-            else if (dstFormat == PIX_FMT_UYVY422)
-                c->swScale= planarToUyvyWrapper;
-        }
-    }
-    if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
-        c->swScale= yuyvToYuv420Wrapper;
-    if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
-        c->swScale= uyvyToYuv420Wrapper;
-    if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
-        c->swScale= yuyvToYuv422Wrapper;
-    if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
-        c->swScale= uyvyToYuv422Wrapper;
-
-    /* simple copy */
-    if (  srcFormat == dstFormat
-        || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
-        || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
-        || (isPlanarYUV(srcFormat) && isGray(dstFormat))
-        || (isPlanarYUV(dstFormat) && isGray(srcFormat))
-        || (isGray(dstFormat) && isGray(srcFormat))
-        || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
-            && c->chrDstHSubSample == c->chrSrcHSubSample
-            && c->chrDstVSubSample == c->chrSrcVSubSample
-            && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21
-            && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21))
-    {
-        if (isPacked(c->srcFormat))
-            c->swScale= packedCopyWrapper;
-        else /* Planar YUV or gray */
-            c->swScale= planarCopyWrapper;
-    }
-
-    if (ARCH_BFIN)
-        ff_bfin_get_unscaled_swscale(c);
-    if (HAVE_ALTIVEC)
-        ff_swscale_get_unscaled_altivec(c);
-}
-
-static void reset_ptr(const uint8_t* src[], int format)
-{
-    if(!isALPHA(format))
-        src[3]=NULL;
-    if(!isPlanarYUV(format)) {
-        src[3]=src[2]=NULL;
-
-        if (!usePal(format))
-            src[1]= NULL;
-    }
-}
-
-static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt,
-                                const int linesizes[4])
-{
-    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
-    int i;
-
-    for (i = 0; i < 4; i++) {
-        int plane = desc->comp[i].plane;
-        if (!data[plane] || !linesizes[plane])
-            return 0;
-    }
-
-    return 1;
-}
-
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY,
-              int srcSliceH, uint8_t* const dst[], const int dstStride[])
-{
-    int i;
-    const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
-    uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
-
-    // do not mess up sliceDir if we have a "trailing" 0-size slice
-    if (srcSliceH == 0)
-        return 0;
-
-    if (!check_image_pointers(src, c->srcFormat, srcStride)) {
-        av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
-        return 0;
-    }
-    if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
-        av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
-        return 0;
-    }
-
-    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
-        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
-        return 0;
-    }
-    if (c->sliceDir == 0) {
-        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
-    }
-
-    if (usePal(c->srcFormat)) {
-        for (i=0; i<256; i++) {
-            int p, r, g, b,y,u,v;
-            if(c->srcFormat == PIX_FMT_PAL8) {
-                p=((const uint32_t*)(src[1]))[i];
-                r= (p>>16)&0xFF;
-                g= (p>> 8)&0xFF;
-                b=  p     &0xFF;
-            } else if(c->srcFormat == PIX_FMT_RGB8) {
-                r= (i>>5    )*36;
-                g= ((i>>2)&7)*36;
-                b= (i&3     )*85;
-            } else if(c->srcFormat == PIX_FMT_BGR8) {
-                b= (i>>6    )*85;
-                g= ((i>>3)&7)*36;
-                r= (i&7     )*36;
-            } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) {
-                r= (i>>3    )*255;
-                g= ((i>>1)&3)*85;
-                b= (i&1     )*255;
-            } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) {
-                r = g = b = i;
-            } else {
-                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
-                b= (i>>3    )*255;
-                g= ((i>>1)&3)*85;
-                r= (i&1     )*255;
-            }
-            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            c->pal_yuv[i]= y + (u<<8) + (v<<16);
-
-            switch(c->dstFormat) {
-            case PIX_FMT_BGR32:
-#if !HAVE_BIGENDIAN
-            case PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
-                break;
-            case PIX_FMT_BGR32_1:
-#if HAVE_BIGENDIAN
-            case PIX_FMT_BGR24:
-#endif
-                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
-                break;
-            case PIX_FMT_RGB32_1:
-#if HAVE_BIGENDIAN
-            case PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
-                break;
-            case PIX_FMT_RGB32:
-#if !HAVE_BIGENDIAN
-            case PIX_FMT_BGR24:
-#endif
-            default:
-                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
-            }
-        }
-    }
-
-    // copy strides, so they can safely be modified
-    if (c->sliceDir == 1) {
-        // slices go from top to bottom
-        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
-        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
-
-        reset_ptr(src2, c->srcFormat);
-        reset_ptr((const uint8_t**)dst2, c->dstFormat);
-
-        /* reset slice direction at end of frame */
-        if (srcSliceY + srcSliceH == c->srcH)
-            c->sliceDir = 0;
-
-        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
-    } else {
-        // slices go from bottom to top => we flip the image internally
-        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
-        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
-
-        src2[0] += (srcSliceH-1)*srcStride[0];
-        if (!usePal(c->srcFormat))
-            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
-        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
-        src2[3] += (srcSliceH-1)*srcStride[3];
-        dst2[0] += ( c->dstH                      -1)*dstStride[0];
-        dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
-        dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
-        dst2[3] += ( c->dstH                      -1)*dstStride[3];
-
-        reset_ptr(src2, c->srcFormat);
-        reset_ptr((const uint8_t**)dst2, c->dstFormat);
-
-        /* reset slice direction at end of frame */
-        if (!srcSliceY)
-            c->sliceDir = 0;
-
-        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
-    }
-}
-
-/* Convert the palette to the same packed 32-bit format as the palette */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i=0; i<num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
-}
-
-/* Palette format: ABCD -> dst format: ABC */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
-{
-    int i;
-
-    for (i=0; i<num_pixels; i++) {
-        //FIXME slow?
-        dst[0]= palette[src[i]*4+0];
-        dst[1]= palette[src[i]*4+1];
-        dst[2]= palette[src[i]*4+2];
-        dst+= 3;
-    }
-}
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 678d6d5797..6aaa843015 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -464,6 +464,13 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_Y400A       \
         || (x)==PIX_FMT_YUVA420P    \
     )
+#define isPacked(x)         (       \
+           (x)==PIX_FMT_PAL8        \
+        || (x)==PIX_FMT_YUYV422     \
+        || (x)==PIX_FMT_UYVY422     \
+        || (x)==PIX_FMT_Y400A       \
+        || isAnyRGB(x)              \
+    )
 #define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A)
 
 extern const uint64_t ff_dither4[2];
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
new file mode 100644
index 0000000000..87cd655a46
--- /dev/null
+++ b/libswscale/swscale_unscaled.c
@@ -0,0 +1,887 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include "config.h"
+#include <assert.h>
+#include "swscale.h"
+#include "swscale_internal.h"
+#include "rgb2rgb.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/cpu.h"
+#include "libavutil/avutil.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/bswap.h"
+#include "libavutil/pixdesc.h"
+
+#define RGB2YUV_SHIFT 15
+#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+
+static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
+{
+    int i;
+    uint8_t *ptr = plane + stride*y;
+    for (i=0; i<height; i++) {
+        memset(ptr, val, width);
+        ptr += stride;
+    }
+}
+
+static void copyPlane(const uint8_t *src, int srcStride,
+                      int srcSliceY, int srcSliceH, int width,
+                      uint8_t *dst, int dstStride)
+{
+    dst += dstStride * srcSliceY;
+    if (dstStride == srcStride && srcStride > 0) {
+        memcpy(dst, src, srcSliceH * dstStride);
+    } else {
+        int i;
+        for (i=0; i<srcSliceH; i++) {
+            memcpy(dst, src, width);
+            src += srcStride;
+            dst += dstStride;
+        }
+    }
+}
+
+static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2;
+
+    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+              dstParam[0], dstStride[0]);
+
+    if (c->dstFormat == PIX_FMT_NV12)
+        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
+    else
+        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    if (dstParam[3])
+        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
+    return srcSliceH;
+}
+
+static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
+
+    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    if (dstParam[3])
+        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
+    return srcSliceH;
+}
+
+static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
+{
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
+
+    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
+{
+    int i;
+    for (i=0; i<num_pixels; i++)
+        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
+}
+
+static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
+{
+    int i;
+
+    for (i=0; i<num_pixels; i++)
+        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
+}
+
+static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
+{
+    int i;
+
+    for (i=0; i<num_pixels; i++) {
+        //FIXME slow?
+        dst[0]= palette[src[i<<1]*4+0];
+        dst[1]= palette[src[i<<1]*4+1];
+        dst[2]= palette[src[i<<1]*4+2];
+        dst+= 3;
+    }
+}
+
+static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    const enum PixelFormat srcFormat= c->srcFormat;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
+                 const uint8_t *palette)=NULL;
+    int i;
+    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+    const uint8_t *srcPtr= src[0];
+
+    if (srcFormat == PIX_FMT_Y400A) {
+        switch (dstFormat) {
+        case PIX_FMT_RGB32  : conv = gray8aToPacked32; break;
+        case PIX_FMT_BGR32  : conv = gray8aToPacked32; break;
+        case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break;
+        case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break;
+        case PIX_FMT_RGB24  : conv = gray8aToPacked24; break;
+        case PIX_FMT_BGR24  : conv = gray8aToPacked24; break;
+        }
+    } else if (usePal(srcFormat)) {
+        switch (dstFormat) {
+        case PIX_FMT_RGB32  : conv = sws_convertPalette8ToPacked32; break;
+        case PIX_FMT_BGR32  : conv = sws_convertPalette8ToPacked32; break;
+        case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break;
+        case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break;
+        case PIX_FMT_RGB24  : conv = sws_convertPalette8ToPacked24; break;
+        case PIX_FMT_BGR24  : conv = sws_convertPalette8ToPacked24; break;
+        }
+    }
+
+    if (!conv)
+        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+               sws_format_name(srcFormat), sws_format_name(dstFormat));
+    else {
+        for (i=0; i<srcSliceH; i++) {
+            conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
+            srcPtr+= srcStride[0];
+            dstPtr+= dstStride[0];
+        }
+    }
+
+    return srcSliceH;
+}
+
+#define isRGBA32(x) (            \
+           (x) == PIX_FMT_ARGB   \
+        || (x) == PIX_FMT_RGBA   \
+        || (x) == PIX_FMT_BGRA   \
+        || (x) == PIX_FMT_ABGR   \
+        )
+
+/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
+static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    const enum PixelFormat srcFormat= c->srcFormat;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    const int srcBpp= (c->srcFormatBpp + 7) >> 3;
+    const int dstBpp= (c->dstFormatBpp + 7) >> 3;
+    const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
+    const int dstId= c->dstFormatBpp >> 2;
+    void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
+
+#define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
+
+    if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
+        if (     CONV_IS(ABGR, RGBA)
+              || CONV_IS(ARGB, BGRA)
+              || CONV_IS(BGRA, ARGB)
+              || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210;
+        else if (CONV_IS(ABGR, ARGB)
+              || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321;
+        else if (CONV_IS(ABGR, BGRA)
+              || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230;
+        else if (CONV_IS(BGRA, RGBA)
+              || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
+        else if (CONV_IS(BGRA, ABGR)
+              || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
+    } else
+    /* BGR -> BGR */
+    if (  (isBGRinInt(srcFormat) && isBGRinInt(dstFormat))
+       || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) {
+        switch(srcId | (dstId<<4)) {
+        case 0x34: conv= rgb16to15; break;
+        case 0x36: conv= rgb24to15; break;
+        case 0x38: conv= rgb32to15; break;
+        case 0x43: conv= rgb15to16; break;
+        case 0x46: conv= rgb24to16; break;
+        case 0x48: conv= rgb32to16; break;
+        case 0x63: conv= rgb15to24; break;
+        case 0x64: conv= rgb16to24; break;
+        case 0x68: conv= rgb32to24; break;
+        case 0x83: conv= rgb15to32; break;
+        case 0x84: conv= rgb16to32; break;
+        case 0x86: conv= rgb24to32; break;
+        }
+    } else if (  (isBGRinInt(srcFormat) && isRGBinInt(dstFormat))
+             || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) {
+        switch(srcId | (dstId<<4)) {
+        case 0x33: conv= rgb15tobgr15; break;
+        case 0x34: conv= rgb16tobgr15; break;
+        case 0x36: conv= rgb24tobgr15; break;
+        case 0x38: conv= rgb32tobgr15; break;
+        case 0x43: conv= rgb15tobgr16; break;
+        case 0x44: conv= rgb16tobgr16; break;
+        case 0x46: conv= rgb24tobgr16; break;
+        case 0x48: conv= rgb32tobgr16; break;
+        case 0x63: conv= rgb15tobgr24; break;
+        case 0x64: conv= rgb16tobgr24; break;
+        case 0x66: conv= rgb24tobgr24; break;
+        case 0x68: conv= rgb32tobgr24; break;
+        case 0x83: conv= rgb15tobgr32; break;
+        case 0x84: conv= rgb16tobgr32; break;
+        case 0x86: conv= rgb24tobgr32; break;
+        }
+    }
+
+    if (!conv) {
+        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+               sws_format_name(srcFormat), sws_format_name(dstFormat));
+    } else {
+        const uint8_t *srcPtr= src[0];
+              uint8_t *dstPtr= dst[0];
+        if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat))
+            srcPtr += ALT32_CORR;
+
+        if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat))
+            dstPtr += ALT32_CORR;
+
+        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
+            conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
+        else {
+            int i;
+            dstPtr += dstStride[0]*srcSliceY;
+
+            for (i=0; i<srcSliceH; i++) {
+                conv(srcPtr, dstPtr, c->srcW*srcBpp);
+                srcPtr+= srcStride[0];
+                dstPtr+= dstStride[0];
+            }
+        }
+    }
+    return srcSliceH;
+}
+
+static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                              int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    rgb24toyv12(
+        src[0],
+        dst[0]+ srcSliceY    *dstStride[0],
+        dst[1]+(srcSliceY>>1)*dstStride[1],
+        dst[2]+(srcSliceY>>1)*dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0]);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                             int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+              dst[0], dstStride[0]);
+
+    planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
+             srcSliceH >> 2, srcStride[1], dstStride[1]);
+    planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
+             srcSliceH >> 2, srcStride[2], dstStride[2]);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+/* unscaled copy like stuff (assumes nearly identical formats) */
+static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                             int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
+        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
+    else {
+        int i;
+        const uint8_t *srcPtr= src[0];
+        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+        int length=0;
+
+        /* universal length finder */
+        while(length+c->srcW <= FFABS(dstStride[0])
+           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
+        assert(length!=0);
+
+        for (i=0; i<srcSliceH; i++) {
+            memcpy(dstPtr, srcPtr, length);
+            srcPtr+= srcStride[0];
+            dstPtr+= dstStride[0];
+        }
+    }
+    return srcSliceH;
+}
+
+static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+                             int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    int plane, i, j;
+    for (plane=0; plane<4; plane++) {
+        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
+        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
+        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
+        const uint8_t *srcPtr= src[plane];
+        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
+
+        if (!dst[plane]) continue;
+        // ignore palette for GRAY8
+        if (plane == 1 && !dst[2]) continue;
+        if (!src[plane] || (plane == 1 && !src[2])) {
+            if(is16BPS(c->dstFormat))
+                length*=2;
+            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
+        } else {
+            if(is9_OR_10BPS(c->srcFormat)) {
+                const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
+                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
+                const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
+
+                if (is16BPS(c->dstFormat)) {
+                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+#define COPY9_OR_10TO16(rfunc, wfunc) \
+                    for (i = 0; i < height; i++) { \
+                        for (j = 0; j < length; j++) { \
+                            int srcpx = rfunc(&srcPtr2[j]); \
+                            wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
+                        } \
+                        dstPtr2 += dstStride[plane]/2; \
+                        srcPtr2 += srcStride[plane]/2; \
+                    }
+                    if (isBE(c->dstFormat)) {
+                        if (isBE(c->srcFormat)) {
+                            COPY9_OR_10TO16(AV_RB16, AV_WB16);
+                        } else {
+                            COPY9_OR_10TO16(AV_RL16, AV_WB16);
+                        }
+                    } else {
+                        if (isBE(c->srcFormat)) {
+                            COPY9_OR_10TO16(AV_RB16, AV_WL16);
+                        } else {
+                            COPY9_OR_10TO16(AV_RL16, AV_WL16);
+                        }
+                    }
+                } else if (is9_OR_10BPS(c->dstFormat)) {
+                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+#define COPY9_OR_10TO9_OR_10(loop) \
+                    for (i = 0; i < height; i++) { \
+                        for (j = 0; j < length; j++) { \
+                            loop; \
+                        } \
+                        dstPtr2 += dstStride[plane]/2; \
+                        srcPtr2 += srcStride[plane]/2; \
+                    }
+#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
+                    if (dst_depth > src_depth) { \
+                        COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
+                            wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
+                    } else if (dst_depth < src_depth) { \
+                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
+                    } else { \
+                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
+                    }
+                    if (isBE(c->dstFormat)) {
+                        if (isBE(c->srcFormat)) {
+                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
+                        } else {
+                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
+                        }
+                    } else {
+                        if (isBE(c->srcFormat)) {
+                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
+                        } else {
+                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
+                        }
+                    }
+                } else {
+                    // FIXME Maybe dither instead.
+#define COPY9_OR_10TO8(rfunc) \
+                    for (i = 0; i < height; i++) { \
+                        for (j = 0; j < length; j++) { \
+                            dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
+                        } \
+                        dstPtr  += dstStride[plane]; \
+                        srcPtr2 += srcStride[plane]/2; \
+                    }
+                    if (isBE(c->srcFormat)) {
+                        COPY9_OR_10TO8(AV_RB16);
+                    } else {
+                        COPY9_OR_10TO8(AV_RL16);
+                    }
+                }
+            } else if(is9_OR_10BPS(c->dstFormat)) {
+                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
+                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+
+                if (is16BPS(c->srcFormat)) {
+                    const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
+#define COPY16TO9_OR_10(rfunc, wfunc) \
+                    for (i = 0; i < height; i++) { \
+                        for (j = 0; j < length; j++) { \
+                            wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
+                        } \
+                        dstPtr2 += dstStride[plane]/2; \
+                        srcPtr2 += srcStride[plane]/2; \
+                    }
+                    if (isBE(c->dstFormat)) {
+                        if (isBE(c->srcFormat)) {
+                            COPY16TO9_OR_10(AV_RB16, AV_WB16);
+                        } else {
+                            COPY16TO9_OR_10(AV_RL16, AV_WB16);
+                        }
+                    } else {
+                        if (isBE(c->srcFormat)) {
+                            COPY16TO9_OR_10(AV_RB16, AV_WL16);
+                        } else {
+                            COPY16TO9_OR_10(AV_RL16, AV_WL16);
+                        }
+                    }
+                } else /* 8bit */ {
+#define COPY8TO9_OR_10(wfunc) \
+                    for (i = 0; i < height; i++) { \
+                        for (j = 0; j < length; j++) { \
+                            const int srcpx = srcPtr[j]; \
+                            wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
+                        } \
+                        dstPtr2 += dstStride[plane]/2; \
+                        srcPtr  += srcStride[plane]; \
+                    }
+                    if (isBE(c->dstFormat)) {
+                        COPY8TO9_OR_10(AV_WB16);
+                    } else {
+                        COPY8TO9_OR_10(AV_WL16);
+                    }
+                }
+            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
+                if (!isBE(c->srcFormat)) srcPtr++;
+                for (i=0; i<height; i++) {
+                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
+                    srcPtr+= srcStride[plane];
+                    dstPtr+= dstStride[plane];
+                }
+            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
+                for (i=0; i<height; i++) {
+                    for (j=0; j<length; j++) {
+                        dstPtr[ j<<1   ] = srcPtr[j];
+                        dstPtr[(j<<1)+1] = srcPtr[j];
+                    }
+                    srcPtr+= srcStride[plane];
+                    dstPtr+= dstStride[plane];
+                }
+            } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
+                  && isBE(c->srcFormat) != isBE(c->dstFormat)) {
+
+                for (i=0; i<height; i++) {
+                    for (j=0; j<length; j++)
+                        ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]);
+                    srcPtr+= srcStride[plane];
+                    dstPtr+= dstStride[plane];
+                }
+            } else if (dstStride[plane] == srcStride[plane] &&
+                       srcStride[plane] > 0 && srcStride[plane] == length) {
+                memcpy(dst[plane] + dstStride[plane]*y, src[plane],
+                       height*dstStride[plane]);
+            } else {
+                if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
+                    length*=2;
+                for (i=0; i<height; i++) {
+                    memcpy(dstPtr, srcPtr, length);
+                    srcPtr+= srcStride[plane];
+                    dstPtr+= dstStride[plane];
+                }
+            }
+        }
+    }
+    return srcSliceH;
+}
+
+void ff_get_unscaled_swscale(SwsContext *c)
+{
+    const enum PixelFormat srcFormat = c->srcFormat;
+    const enum PixelFormat dstFormat = c->dstFormat;
+    const int flags = c->flags;
+    const int dstH = c->dstH;
+    int needsDither;
+
+    needsDither= isAnyRGB(dstFormat)
+        &&  c->dstFormatBpp < 24
+        && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat)));
+
+    /* yv12_to_nv12 */
+    if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
+        c->swScale= planarToNv12Wrapper;
+    }
+    /* yuv2bgr */
+    if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat)
+        && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
+        c->swScale= ff_yuv2rgb_get_func_ptr(c);
+    }
+
+    if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
+        c->swScale= yvu9ToYv12Wrapper;
+    }
+
+    /* bgr24toYV12 */
+    if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
+        c->swScale= bgr24ToYv12Wrapper;
+
+    /* RGB/BGR -> RGB/BGR (no dither needed forms) */
+    if (   isAnyRGB(srcFormat)
+        && isAnyRGB(dstFormat)
+        && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
+        && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
+        && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
+        && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
+        && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
+        && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
+        && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
+        && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
+        && srcFormat != PIX_FMT_RGB48LE   && dstFormat != PIX_FMT_RGB48LE
+        && srcFormat != PIX_FMT_RGB48BE   && dstFormat != PIX_FMT_RGB48BE
+        && srcFormat != PIX_FMT_BGR48LE   && dstFormat != PIX_FMT_BGR48LE
+        && srcFormat != PIX_FMT_BGR48BE   && dstFormat != PIX_FMT_BGR48BE
+        && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
+        c->swScale= rgbToRgbWrapper;
+
+    if ((usePal(srcFormat) && (
+        dstFormat == PIX_FMT_RGB32   ||
+        dstFormat == PIX_FMT_RGB32_1 ||
+        dstFormat == PIX_FMT_RGB24   ||
+        dstFormat == PIX_FMT_BGR32   ||
+        dstFormat == PIX_FMT_BGR32_1 ||
+        dstFormat == PIX_FMT_BGR24)))
+        c->swScale= palToRgbWrapper;
+
+    if (srcFormat == PIX_FMT_YUV422P) {
+        if (dstFormat == PIX_FMT_YUYV422)
+            c->swScale= yuv422pToYuy2Wrapper;
+        else if (dstFormat == PIX_FMT_UYVY422)
+            c->swScale= yuv422pToUyvyWrapper;
+    }
+
+    /* LQ converters if -sws 0 or -sws 4*/
+    if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
+        /* yv12_to_yuy2 */
+        if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) {
+            if (dstFormat == PIX_FMT_YUYV422)
+                c->swScale= planarToYuy2Wrapper;
+            else if (dstFormat == PIX_FMT_UYVY422)
+                c->swScale= planarToUyvyWrapper;
+        }
+    }
+    if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+        c->swScale= yuyvToYuv420Wrapper;
+    if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
+        c->swScale= uyvyToYuv420Wrapper;
+    if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
+        c->swScale= yuyvToYuv422Wrapper;
+    if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
+        c->swScale= uyvyToYuv422Wrapper;
+
+    /* simple copy */
+    if (  srcFormat == dstFormat
+        || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
+        || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
+        || (isPlanarYUV(srcFormat) && isGray(dstFormat))
+        || (isPlanarYUV(dstFormat) && isGray(srcFormat))
+        || (isGray(dstFormat) && isGray(srcFormat))
+        || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
+            && c->chrDstHSubSample == c->chrSrcHSubSample
+            && c->chrDstVSubSample == c->chrSrcVSubSample
+            && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21
+            && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21))
+    {
+        if (isPacked(c->srcFormat))
+            c->swScale= packedCopyWrapper;
+        else /* Planar YUV or gray */
+            c->swScale= planarCopyWrapper;
+    }
+
+    if (ARCH_BFIN)
+        ff_bfin_get_unscaled_swscale(c);
+    if (HAVE_ALTIVEC)
+        ff_swscale_get_unscaled_altivec(c);
+}
+
+static void reset_ptr(const uint8_t* src[], int format)
+{
+    if(!isALPHA(format))
+        src[3]=NULL;
+    if(!isPlanarYUV(format)) {
+        src[3]=src[2]=NULL;
+
+        if (!usePal(format))
+            src[1]= NULL;
+    }
+}
+
+static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt,
+                                const int linesizes[4])
+{
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        int plane = desc->comp[i].plane;
+        if (!data[plane] || !linesizes[plane])
+            return 0;
+    }
+
+    return 1;
+}
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY,
+              int srcSliceH, uint8_t* const dst[], const int dstStride[])
+{
+    int i;
+    const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
+    uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
+
+    // do not mess up sliceDir if we have a "trailing" 0-size slice
+    if (srcSliceH == 0)
+        return 0;
+
+    if (!check_image_pointers(src, c->srcFormat, srcStride)) {
+        av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
+        return 0;
+    }
+    if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
+        av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
+        return 0;
+    }
+
+    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
+        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
+        return 0;
+    }
+    if (c->sliceDir == 0) {
+        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
+    }
+
+    if (usePal(c->srcFormat)) {
+        for (i=0; i<256; i++) {
+            int p, r, g, b,y,u,v;
+            if(c->srcFormat == PIX_FMT_PAL8) {
+                p=((const uint32_t*)(src[1]))[i];
+                r= (p>>16)&0xFF;
+                g= (p>> 8)&0xFF;
+                b=  p     &0xFF;
+            } else if(c->srcFormat == PIX_FMT_RGB8) {
+                r= (i>>5    )*36;
+                g= ((i>>2)&7)*36;
+                b= (i&3     )*85;
+            } else if(c->srcFormat == PIX_FMT_BGR8) {
+                b= (i>>6    )*85;
+                g= ((i>>3)&7)*36;
+                r= (i&7     )*36;
+            } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) {
+                r= (i>>3    )*255;
+                g= ((i>>1)&3)*85;
+                b= (i&1     )*255;
+            } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) {
+                r = g = b = i;
+            } else {
+                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
+                b= (i>>3    )*255;
+                g= ((i>>1)&3)*85;
+                r= (i&1     )*255;
+            }
+            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            c->pal_yuv[i]= y + (u<<8) + (v<<16);
+
+            switch(c->dstFormat) {
+            case PIX_FMT_BGR32:
+#if !HAVE_BIGENDIAN
+            case PIX_FMT_RGB24:
+#endif
+                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
+                break;
+            case PIX_FMT_BGR32_1:
+#if HAVE_BIGENDIAN
+            case PIX_FMT_BGR24:
+#endif
+                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
+                break;
+            case PIX_FMT_RGB32_1:
+#if HAVE_BIGENDIAN
+            case PIX_FMT_RGB24:
+#endif
+                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
+                break;
+            case PIX_FMT_RGB32:
+#if !HAVE_BIGENDIAN
+            case PIX_FMT_BGR24:
+#endif
+            default:
+                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
+            }
+        }
+    }
+
+    // copy strides, so they can safely be modified
+    if (c->sliceDir == 1) {
+        // slices go from top to bottom
+        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
+        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
+
+        reset_ptr(src2, c->srcFormat);
+        reset_ptr((const uint8_t**)dst2, c->dstFormat);
+
+        /* reset slice direction at end of frame */
+        if (srcSliceY + srcSliceH == c->srcH)
+            c->sliceDir = 0;
+
+        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
+    } else {
+        // slices go from bottom to top => we flip the image internally
+        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
+        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
+
+        src2[0] += (srcSliceH-1)*srcStride[0];
+        if (!usePal(c->srcFormat))
+            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
+        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
+        src2[3] += (srcSliceH-1)*srcStride[3];
+        dst2[0] += ( c->dstH                      -1)*dstStride[0];
+        dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
+        dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
+        dst2[3] += ( c->dstH                      -1)*dstStride[3];
+
+        reset_ptr(src2, c->srcFormat);
+        reset_ptr((const uint8_t**)dst2, c->dstFormat);
+
+        /* reset slice direction at end of frame */
+        if (!srcSliceY)
+            c->sliceDir = 0;
+
+        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
+    }
+}
+
+/* Convert the palette to the same packed 32-bit format as the palette */
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
+{
+    int i;
+
+    for (i=0; i<num_pixels; i++)
+        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
+}
+
+/* Palette format: ABCD -> dst format: ABC */
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
+{
+    int i;
+
+    for (i=0; i<num_pixels; i++) {
+        //FIXME slow?
+        dst[0]= palette[src[i]*4+0];
+        dst[1]= palette[src[i]*4+1];
+        dst[2]= palette[src[i]*4+2];
+        dst+= 3;
+    }
+}

From f35761bca20a96c74c3083459643046683a95e42 Mon Sep 17 00:00:00 2001
From: JULIAN GARDNER <joolzg@btinternet.com>
Date: Fri, 3 Jun 2011 16:08:16 +0200
Subject: [PATCH 564/830] udp: fix indention

---
 libavformat/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/udp.c b/libavformat/udp.c
index 8c8c195fc4..7a01d29758 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c
@@ -518,7 +518,7 @@ static int udp_open(URLContext *h, const char *uri, int flags)
  fail:
     if (udp_fd >= 0)
         closesocket(udp_fd);
-        av_fifo_free(s->fifo);
+    av_fifo_free(s->fifo);
     av_free(s);
     return AVERROR(EIO);
 }

From 58149303b3e502199bd663c2c107b7a4771bc314 Mon Sep 17 00:00:00 2001
From: Rukhsana Ruby <rukhsana.afroz@gmail.com>
Date: Fri, 3 Jun 2011 18:47:30 +0200
Subject: [PATCH 565/830] makefile: fix j2k encoder dependancies

---
 libavcodec/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 76cb8cd161..fa05d03758 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -194,7 +194,7 @@ OBJS-$(CONFIG_INDEO5_DECODER)          += indeo5.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
 OBJS-$(CONFIG_JPEG2000_DECODER)        += j2kdec.o mqcdec.o mqc.o j2k.o j2k_dwt.o
-#OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o j2k.o dwt.o
+#OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o j2k.o j2k_dwt.o
 OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o \
                                           mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)          += jpeglsenc.o jpegls.o

From c1daf0723c49d331deecd63b4a6a328000821950 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Jun 2011 19:34:43 +0200
Subject: [PATCH 566/830] j2kdec: dont fail on non zero cblock style.

This allows decoding to continue a bit further for some files.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/j2kdec.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c
index 3156e90198..2497a2e015 100644
--- a/libavcodec/j2kdec.c
+++ b/libavcodec/j2kdec.c
@@ -285,8 +285,7 @@ static int get_cox(J2kDecoderContext *s, J2kCodingStyle *c)
 
     c->cblk_style = bytestream_get_byte(&s->buf);
     if (c->cblk_style != 0){ // cblk style
-        av_log(s->avctx, AV_LOG_ERROR, "no extra cblk styles supported\n");
-        return -1;
+        av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
     }
     c->transform = bytestream_get_byte(&s->buf); // transformation
     if (c->csty & J2K_CSTY_PREC) {

From dc6632f1195c929a87ddf1b02d12b681c6de79ad Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 3 Jun 2011 15:50:14 +0100
Subject: [PATCH 567/830] build: simplify commands for clean target

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 subdir.mak | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/subdir.mak b/subdir.mak
index 7fdeddcceb..1d5824b022 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -50,12 +50,12 @@ endif
 
 clean::
 	$(RM) $(addprefix $(SUBDIR),*-example$(EXESUF) *-test$(EXESUF) $(CLEANFILES) $(CLEANSUFFIXES) $(LIBSUFFIXES)) \
-	    $(addprefix $(SUBDIR), $(foreach suffix,$(CLEANSUFFIXES),$(addsuffix /$(suffix),$(DIRS)))) \
+	    $(foreach dir,$(DIRS),$(CLEANSUFFIXES:%=$(SUBDIR)$(dir)/%)) \
 	    $(HOSTOBJS) $(HOSTPROGS)
 
 distclean:: clean
-	$(RM)  $(addprefix $(SUBDIR),$(DISTCLEANSUFFIXES)) \
-            $(addprefix $(SUBDIR), $(foreach suffix,$(DISTCLEANSUFFIXES),$(addsuffix /$(suffix),$(DIRS))))
+	$(RM) $(DISTCLEANSUFFIXES:%=$(SUBDIR)%) \
+	    $(foreach dir,$(DIRS),$(DISTCLEANSUFFIXES:%=$(SUBDIR)$(dir)/%))
 
 install-lib$(NAME)-shared: $(SUBDIR)$(SLIBNAME)
 	$(Q)mkdir -p "$(SHLIBDIR)"

From 49125aeddcfe1990ab4159d1b144b9387182452b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Jun 2011 21:14:30 +0200
Subject: [PATCH 568/830] oggdec: fix Ticket185

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/oggdec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index e335358601..655da35dd4 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -410,6 +410,8 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
             *fpos = os->sync_pos;
         os->pstart += os->psize;
         os->psize = 0;
+        if(os->pstart == os->bufpos)
+            os->bufpos = os->pstart = 0;
         os->sync_pos = os->page_pos;
     }
 

From 33af5335fd8e1b411bb1bdfad93b675033866c2c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 4 Jun 2011 00:16:12 +0200
Subject: [PATCH 569/830] ffplay: error out with invalid sample rate or
 channels.

Fixes Ticket119
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffplay.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ffplay.c b/ffplay.c
index 5997f651e1..4139afb7cc 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -2158,6 +2158,10 @@ static int stream_component_open(VideoState *is, int stream_index)
 
     /* prepare audio output */
     if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+        if(avctx->sample_rate <= 0 || avctx->channels <= 0){
+            fprintf(stderr, "Invalid sample rate or channel count\n");
+            return -1;
+        }
         wanted_spec.freq = avctx->sample_rate;
         wanted_spec.format = AUDIO_S16SYS;
         wanted_spec.channels = avctx->channels;

From bfd3b70ac3162f028bbfa5a8cd47d2715d49f77e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 4 Jun 2011 01:24:00 +0200
Subject: [PATCH 570/830] ffmpeg: fix segfault with too many output files

Fixes Ticket219

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index e5986a6a38..35436aa1f2 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3863,6 +3863,11 @@ static void opt_output_file(const char *filename)
     AVFormatParameters params, *ap = &params;
     AVOutputFormat *file_oformat;
 
+    if(nb_output_files >= FF_ARRAY_ELEMS(output_files)){
+        fprintf(stderr, "Too many output files\n");
+        ffmpeg_exit(1);
+    }
+
     if (!strcmp(filename, "-"))
         filename = "pipe:";
 

From e4e2db9c74a10b2342297489edc00e99b10d5eb3 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 3 Jun 2011 19:52:27 -0700
Subject: [PATCH 571/830] ffmpeg: use opt_acodec when setting audio codec in
 opt_target.

---
 ffmpeg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 35436aa1f2..34e61043b7 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -4180,7 +4180,7 @@ static int opt_target(const char *opt, const char *arg)
 
     if(!strcmp(arg, "vcd")) {
         opt_codec("vcodec", "mpeg1video");
-        opt_codec("vcodec", "mp2");
+        opt_codec("acodec", "mp2");
         opt_format("f", "vcd");
 
         opt_frame_size("s", norm == PAL ? "352x288" : "352x240");
@@ -4230,7 +4230,7 @@ static int opt_target(const char *opt, const char *arg)
     } else if(!strcmp(arg, "dvd")) {
 
         opt_codec("vcodec", "mpeg2video");
-        opt_codec("vcodec", "ac3");
+        opt_codec("acodec", "ac3");
         opt_format("f", "dvd");
 
         opt_frame_size("vcodec", norm == PAL ? "720x576" : "720x480");

From 124a9edb5f172ce36b11fd0d6ccc9f15cc51f322 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 4 Jun 2011 16:55:22 +0200
Subject: [PATCH 572/830] udp: support old, crappy non pthread mode

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 configure         | 2 +-
 libavformat/udp.c | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index c054f8174a..a1110bde52 100755
--- a/configure
+++ b/configure
@@ -1493,7 +1493,7 @@ mmst_protocol_deps="network"
 rtmp_protocol_select="tcp_protocol"
 rtp_protocol_select="udp_protocol"
 tcp_protocol_deps="network"
-udp_protocol_deps="network pthreads"
+udp_protocol_deps="network"
 
 # filters
 blackframe_filter_deps="gpl"
diff --git a/libavformat/udp.c b/libavformat/udp.c
index 7a01d29758..7c18fb7bf0 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c
@@ -35,7 +35,11 @@
 #include "network.h"
 #include "os_support.h"
 #include "url.h"
+
+#if HAVE_PTHREADS
 #include <pthread.h>
+#endif
+
 #include <sys/time.h>
 
 #ifndef IPV6_ADD_MEMBERSHIP
@@ -58,7 +62,9 @@ typedef struct {
     int circular_buffer_size;
     AVFifoBuffer *fifo;
     int circular_buffer_error;
+#if HAVE_PTHREADS
     pthread_t circular_buffer_thread;
+#endif
 } UDPContext;
 
 #define UDP_TX_BUF_SIZE 32768
@@ -505,6 +511,7 @@ static int udp_open(URLContext *h, const char *uri, int flags)
 
     s->udp_fd = udp_fd;
 
+#if HAVE_PTHREADS
     if (!is_output && s->circular_buffer_size) {
         /* start the task going */
         s->fifo = av_fifo_alloc(s->circular_buffer_size);
@@ -513,6 +520,7 @@ static int udp_open(URLContext *h, const char *uri, int flags)
             goto fail;
         }
     }
+#endif
 
     return 0;
  fail:

From d0989bed226578a8f37ca90c78abc97abafb9794 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 13:36:35 +0100
Subject: [PATCH 573/830] Fix error check in av_file_map()

On failure, mmap() returns MAP_FAILED, which may or may not be -1.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/file.c b/libavutil/file.c
index 3dcce7c2f8..f0e48b5b2f 100644
--- a/libavutil/file.c
+++ b/libavutil/file.c
@@ -75,7 +75,7 @@ int av_file_map(const char *filename, uint8_t **bufptr, size_t *size,
 
 #if HAVE_MMAP
     ptr = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
-    if ((int)(ptr) == -1) {
+    if (ptr == MAP_FAILED) {
         err = AVERROR(errno);
         av_strerror(err, errbuf, sizeof(errbuf));
         av_log(&file_log_ctx, AV_LOG_ERROR, "Error occurred in mmap(): %s\n", errbuf);

From 91b4941c17bcf6bd4a0cfc7feb6d7c46868db433 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 16:43:41 +0100
Subject: [PATCH 574/830] musepack: remove extraneous mpcdata.h inclusions

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpc.h  | 2 --
 libavcodec/mpc8.c | 1 -
 2 files changed, 3 deletions(-)

diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index eea4b6df36..f73c658f7d 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -35,8 +35,6 @@
 #include "dsputil.h"
 #include "mpegaudio.h"
 
-#include "mpcdata.h"
-
 #define BANDS            32
 #define SAMPLES_PER_BAND 36
 #define MPC_FRAME_SIZE   (BANDS * SAMPLES_PER_BAND)
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 81de9cf500..3177faf1c4 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -33,7 +33,6 @@
 #include "libavutil/audioconvert.h"
 
 #include "mpc.h"
-#include "mpcdata.h"
 #include "mpc8data.h"
 #include "mpc8huff.h"
 

From 67ace7f0473ff968c7c615796b7a95c90fde68d2 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 16:45:15 +0100
Subject: [PATCH 575/830] musepack: remove unnecessary #include from mpcdata.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpcdata.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/mpcdata.h b/libavcodec/mpcdata.h
index ad06b7abca..397dad59d8 100644
--- a/libavcodec/mpcdata.h
+++ b/libavcodec/mpcdata.h
@@ -22,8 +22,6 @@
 #ifndef AVCODEC_MPCDATA_H
 #define AVCODEC_MPCDATA_H
 
-#include <stdint.h>
-
 static const float mpc_CC[18] = {
     65536.0000, 21845.3333, 13107.2000, 9362.2857, 7281.7778, 4369.0667, 2114.0645,
     1040.2539, 516.0315, 257.0039, 128.2505, 64.0626, 32.0156, 16.0039, 8.0010,

From 8d459acc10d865a82b1646f91a85ae99d998faa7 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 4 Jun 2011 16:58:31 +0200
Subject: [PATCH 576/830] Add missing #includes to make headers self-contained.

This fixes 'make checkheaders'.
---
 libavcodec/mathops.h         | 1 +
 libavcodec/mpc.h             | 1 +
 libavcodec/mpegaudiodectab.h | 2 ++
 libavcodec/put_bits.h        | 1 +
 libavcodec/tableprint.h      | 2 --
 libavformat/network.h        | 3 +++
 6 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index d74bc1ed70..98c9acf5ce 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -23,6 +23,7 @@
 #define AVCODEC_MATHOPS_H
 
 #include "libavutil/common.h"
+#include "config.h"
 
 #if   ARCH_ARM
 #   include "arm/mathops.h"
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index f73c658f7d..6d0f7b45bb 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -34,6 +34,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 
 #define BANDS            32
 #define SAMPLES_PER_BAND 36
diff --git a/libavcodec/mpegaudiodectab.h b/libavcodec/mpegaudiodectab.h
index 041d1860b7..1221657988 100644
--- a/libavcodec/mpegaudiodectab.h
+++ b/libavcodec/mpegaudiodectab.h
@@ -27,7 +27,9 @@
 #ifndef AVCODEC_MPEGAUDIODECTAB_H
 #define AVCODEC_MPEGAUDIODECTAB_H
 
+#include <stddef.h>
 #include <stdint.h>
+
 #include "mpegaudio.h"
 
 /*******************************************************/
diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h
index c426540291..3849e6d339 100644
--- a/libavcodec/put_bits.h
+++ b/libavcodec/put_bits.h
@@ -34,6 +34,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "mathops.h"
+#include "config.h"
 
 //#define ALT_BITSTREAM_WRITER
 //#define ALIGNED_BITSTREAM_WRITER
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index ddf2635da0..de355fc0aa 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -26,8 +26,6 @@
 #include <inttypes.h>
 #include <stdio.h>
 
-#include "libavutil/common.h"
-
 #define WRITE_1D_FUNC_ARGV(type, linebrk, fmtstr, ...)\
 void write_##type##_array(const type *data, int len)\
 {\
diff --git a/libavformat/network.h b/libavformat/network.h
index db8466ce20..80d094a0de 100644
--- a/libavformat/network.h
+++ b/libavformat/network.h
@@ -21,7 +21,10 @@
 #ifndef AVFORMAT_NETWORK_H
 #define AVFORMAT_NETWORK_H
 
+#include <errno.h>
+
 #include "config.h"
+#include "libavutil/error.h"
 #include "os_support.h"
 
 #if HAVE_WINSOCK2_H

From 61ec024d39cc53672901791923e9cb1bd0fda9f3 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 4 Jun 2011 18:02:40 +0200
Subject: [PATCH 577/830] Skip headers not designed to work standalone during
 'make checkheaders'.

---
 libavcodec/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ca5839f2af..77f285eda0 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -651,7 +651,8 @@ OBJS-$(!CONFIG_SMALL)                  += inverse.o
 
 -include $(SUBDIR)$(ARCH)/Makefile
 
-SKIPHEADERS                             = %_tablegen.h
+SKIPHEADERS                            += %_tablegen.h aac_tablegen_decl.h \
+                                          fft-internal.h $(ARCH)/vp56_arith.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBDIRAC)         += libdirac.h
 SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h

From 42abb9a80da16d33ef7f54c3656b4d9524d03435 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 16:47:15 +0100
Subject: [PATCH 578/830] cmdutils: add missing const qualifier

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 cmdutils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index 31866b5563..b9a5d1b069 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -323,7 +323,7 @@ int opt_default(const char *opt, const char *arg){
         AVCodec *p = NULL;
         AVOutputFormat *oformat = NULL;
         while ((p=av_codec_next(p))){
-            AVClass *c= p->priv_class;
+            const AVClass *c = p->priv_class;
             if(c && av_find_opt(&c, opt, NULL, 0, 0))
                 break;
         }

From 1572484f62c59da6755fbcebf09821ada8c43728 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 13:11:39 +0200
Subject: [PATCH 579/830] bktr: don't error when AVFormatParameters.time_base
 isn't set.

There's a private option for it now.
---
 libavdevice/bktr.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 7cae48f5fd..6e19a61103 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -251,11 +251,6 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     AVRational fps;
     int ret = 0;
 
-    if (ap->time_base.den <= 0) {
-        ret = AVERROR(EINVAL);
-        goto out;
-    }
-
 #if FF_API_FORMAT_PARAMETERS
     if (ap->standard) {
         if (!strcasecmp(ap->standard, "pal"))

From a3b15e411d9624a37919e91fe81aec9c98457ed8 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 13:29:43 +0200
Subject: [PATCH 580/830] fbdev,v4l2: remove some forgotten uses of
 AVFormatParameters.time_base.

---
 libavdevice/fbdev.c | 2 +-
 libavdevice/v4l2.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index 58b3ab4572..7e9ffe5c77 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -169,7 +169,7 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     st->codec->width      = fbdev->width;
     st->codec->height     = fbdev->heigth;
     st->codec->pix_fmt    = pix_fmt;
-    st->codec->time_base  = ap->time_base;
+    st->codec->time_base  = (AVRational){fbdev->fps.den, fbdev->fps.num};
     st->codec->bit_rate   =
         fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel * av_q2d(fbdev->fps) * 8;
 
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 0cd4f38389..98ff82ec0d 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -546,6 +546,8 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
             return AVERROR(errno);
         }
     }
+    s1->streams[0]->codec->time_base.den = tpf->denominator;
+    s1->streams[0]->codec->time_base.num = tpf->numerator;
 
     return 0;
 }
@@ -679,8 +681,6 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->codec_id = codec_id;
     st->codec->width = s->width;
     st->codec->height = s->height;
-    st->codec->time_base.den = ap->time_base.den;
-    st->codec->time_base.num = ap->time_base.num;
     st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
 
 out:

From ff494cbdafbaee57275e9227a8a2aff02184c346 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 13:36:36 +0200
Subject: [PATCH 581/830] x11grab: add framerate private option.

---
 libavdevice/x11grab.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index d4fcbca873..b1ca6e699a 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -71,6 +71,7 @@ struct x11_grab
     int use_shm;             /**< !0 when using XShm extension */
     XShmSegmentInfo shminfo; /**< When using XShm, keeps track of XShm infos */
     int nomouse;
+    char *framerate;         /**< Set by a private option. */
 };
 
 /**
@@ -97,6 +98,7 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int use_shm;
     char *param, *offset;
     int ret = 0;
+    AVRational framerate;
 
     param = av_strdup(s1->filename);
     offset = strchr(param, '+');
@@ -110,11 +112,17 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+    if ((ret = av_parse_video_rate(&framerate, x11grab->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Could not parse framerate: %s.\n", x11grab->framerate);
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         x11grab->width = ap->width;
     if (ap->height > 0)
         x11grab->height = ap->height;
+    if (ap->time_base.num)
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
     av_log(s1, AV_LOG_INFO, "device: %s -> display: %s x: %d y: %d width: %d height: %d\n",
            s1->filename, param, x_off, y_off, x11grab->width, x11grab->height);
@@ -126,12 +134,6 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         goto out;
     }
 
-    if (ap->time_base.den <= 0) {
-        av_log(s1, AV_LOG_ERROR, "AVParameters don't have video size and/or rate. Use -s and -r.\n");
-        ret = AVERROR(EINVAL);
-        goto out;
-    }
-
     st = av_new_stream(s1, 0);
     if (!st) {
         ret = AVERROR(ENOMEM);
@@ -240,8 +242,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     x11grab->frame_size = x11grab->width * x11grab->height * image->bits_per_pixel/8;
     x11grab->dpy = dpy;
-    x11grab->time_base  = ap->time_base;
-    x11grab->time_frame = av_gettime() / av_q2d(ap->time_base);
+    x11grab->time_base  = (AVRational){framerate.den, framerate.num};
+    x11grab->time_frame = av_gettime() / av_q2d(x11grab->time_base);
     x11grab->x_off = x_off;
     x11grab->y_off = y_off;
     x11grab->image = image;
@@ -252,11 +254,12 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->width  = x11grab->width;
     st->codec->height = x11grab->height;
     st->codec->pix_fmt = input_pixfmt;
-    st->codec->time_base = ap->time_base;
-    st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(ap->time_base) * 8;
+    st->codec->time_base = x11grab->time_base;
+    st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(x11grab->time_base) * 8;
 
 out:
     av_freep(&x11grab->video_size);
+    av_freep(&x11grab->framerate);
     return ret;
 }
 
@@ -468,6 +471,7 @@ x11grab_read_close(AVFormatContext *s1)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC },
     { NULL },
 };
 

From e762b1ce9505511b1ecbbd4868ec6879e32e8831 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 14:13:14 +0200
Subject: [PATCH 582/830] rawdec: add framerate private option.

---
 libavformat/ingenientdec.c |  3 ++-
 libavformat/rawdec.c       | 48 ++++++++++++++++++++++++--------------
 libavformat/rawdec.h       |  3 +++
 3 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/libavformat/ingenientdec.c b/libavformat/ingenientdec.c
index 7407a04dd9..febeb2ec35 100644
--- a/libavformat/ingenientdec.c
+++ b/libavformat/ingenientdec.c
@@ -61,11 +61,12 @@ static int ingenient_read_packet(AVFormatContext *s, AVPacket *pkt)
 AVInputFormat ff_ingenient_demuxer = {
     "ingenient",
     NULL_IF_CONFIG_SMALL("raw Ingenient MJPEG"),
-    0,
+    sizeof(FFRawVideoDemuxerContext),
     NULL,
     ff_raw_video_read_header,
     ingenient_read_packet,
     .flags= AVFMT_GENERIC_INDEX,
     .extensions = "cgi", // FIXME
     .value = CODEC_ID_MJPEG,
+    .priv_class = &ff_rawvideo_demuxer_class,
 };
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index 3d8125f42c..a92200d844 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -72,11 +72,8 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
             FFRawVideoDemuxerContext *s1 = s->priv_data;
             int width = 0, height = 0, ret = 0;
             enum PixelFormat pix_fmt;
+            AVRational framerate;
 
-            if(ap->time_base.num)
-                av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
-            else
-                av_set_pts_info(st, 64, 1, 25);
             if (s1->video_size && (ret = av_parse_video_size(&width, &height, s1->video_size)) < 0) {
                 av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
                 goto fail;
@@ -86,6 +83,10 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
+            if ((ret = av_parse_video_rate(&framerate, s1->framerate)) < 0) {
+                av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s1->framerate);
+                goto fail;
+            }
 #if FF_API_FORMAT_PARAMETERS
             if (ap->width > 0)
                 width = ap->width;
@@ -93,13 +94,17 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
                 height = ap->height;
             if (ap->pix_fmt)
                 pix_fmt = ap->pix_fmt;
+            if (ap->time_base.num)
+                framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
+            av_set_pts_info(st, 64, framerate.den, framerate.num);
             st->codec->width  = width;
             st->codec->height = height;
             st->codec->pix_fmt = pix_fmt;
 fail:
             av_freep(&s1->video_size);
             av_freep(&s1->pixel_format);
+            av_freep(&s1->framerate);
             return ret;
             }
         default:
@@ -149,30 +154,36 @@ int ff_raw_video_read_header(AVFormatContext *s,
                              AVFormatParameters *ap)
 {
     AVStream *st;
+    FFRawVideoDemuxerContext *s1 = s->priv_data;
+    AVRational framerate;
+    int ret = 0;
+
 
     st = av_new_stream(s, 0);
-    if (!st)
-        return AVERROR(ENOMEM);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
 
     st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id = s->iformat->value;
     st->need_parsing = AVSTREAM_PARSE_FULL;
 
-    /* for MJPEG, specify frame rate */
-    /* for MPEG-4 specify it, too (most MPEG-4 streams do not have the fixed_vop_rate set ...)*/
-    if (ap->time_base.num) {
-        st->codec->time_base= ap->time_base;
-    } else if ( st->codec->codec_id == CODEC_ID_MJPEG ||
-                st->codec->codec_id == CODEC_ID_MPEG4 ||
-                st->codec->codec_id == CODEC_ID_DIRAC ||
-                st->codec->codec_id == CODEC_ID_DNXHD ||
-                st->codec->codec_id == CODEC_ID_VC1   ||
-                st->codec->codec_id == CODEC_ID_H264) {
-        st->codec->time_base= (AVRational){1,25};
+    if ((ret = av_parse_video_rate(&framerate, s1->framerate)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s1->framerate);
+        goto fail;
     }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->time_base.num)
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
+#endif
+
+    st->codec->time_base = (AVRational){framerate.den, framerate.num};
     av_set_pts_info(st, 64, 1, 1200000);
 
-    return 0;
+fail:
+    av_freep(&s1->framerate);
+    return ret;
 }
 
 /* Note: Do not forget to add new entries to the Makefile as well. */
@@ -195,6 +206,7 @@ const AVClass ff_rawaudio_demuxer_class = {
 static const AVOption video_options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "yuv420p"}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC },
     { NULL },
 };
 #undef OFFSET
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index 4968915cc6..76e8053f6d 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -35,6 +35,7 @@ typedef struct FFRawVideoDemuxerContext {
     const AVClass *class;     /**< Class for private options. */
     char *video_size;         /**< String describing video size, set by a private option. */
     char *pixel_format;       /**< Set by a private option. */
+    char *framerate;          /**< String describing framerate, set by a private option. */
 } FFRawVideoDemuxerContext;
 
 extern const AVClass ff_rawaudio_demuxer_class;
@@ -58,6 +59,8 @@ AVInputFormat ff_ ## shortname ## _demuxer = {\
     .extensions     = ext,\
     .flags          = AVFMT_GENERIC_INDEX,\
     .value          = id,\
+    .priv_data_size = sizeof(FFRawVideoDemuxerContext),\
+    .priv_class     = &ff_rawvideo_demuxer_class,\
 };
 
 #endif /* AVFORMAT_RAWDEC_H */

From 8346f60afbb23b9a3dcef8e6683060f71ec296e2 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 20:43:48 +0200
Subject: [PATCH 583/830] tty: factorise returning error codes.

---
 libavformat/tty.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/libavformat/tty.c b/libavformat/tty.c
index 432fcc0c3d..9dada16b8a 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -73,21 +73,20 @@ static int read_header(AVFormatContext *avctx,
                        AVFormatParameters *ap)
 {
     TtyDemuxContext *s = avctx->priv_data;
-    int width = 0, height = 0, ret;
+    int width = 0, height = 0, ret = 0;
     AVStream *st = av_new_stream(avctx, 0);
-    if (!st)
-        return AVERROR(ENOMEM);
+
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
     st->codec->codec_tag   = 0;
     st->codec->codec_type  = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id    = CODEC_ID_ANSI;
 
-    if (s->video_size) {
-        ret = av_parse_video_size(&width, &height, s->video_size);
-        av_freep(&s->video_size);
-        if (ret < 0) {
-            av_log (avctx, AV_LOG_ERROR, "Couldn't parse video size.\n");
-            return ret;
-        }
+    if (s->video_size && (ret = av_parse_video_size(&width, &height, s->video_size)) < 0) {
+        av_log (avctx, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        goto fail;
     }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
@@ -121,7 +120,9 @@ static int read_header(AVFormatContext *avctx,
         avio_seek(avctx->pb, 0, SEEK_SET);
     }
 
-    return 0;
+fail:
+    av_freep(&s->video_size);
+    return ret;
 }
 
 static int read_packet(AVFormatContext *avctx, AVPacket *pkt)

From 95912731c2eb37b538a767b2ff2b8149b16f0bde Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 17:52:30 +0100
Subject: [PATCH 584/830] mathops: use MUL64 macro where it forms part of other
 ops

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mathops.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 98c9acf5ce..ec76eaae29 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -41,13 +41,17 @@
 
 /* generic implementation */
 
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+
 #ifndef MULL
-#   define MULL(a,b,s) (((int64_t)(a) * (int64_t)(b)) >> (s))
+#   define MULL(a,b,s) (MUL64(a, b) >> (s))
 #endif
 
 #ifndef MULH
 static av_always_inline int MULH(int a, int b){
-    return ((int64_t)(a) * (int64_t)(b))>>32;
+    return MUL64(a, b) >> 32;
 }
 #endif
 
@@ -57,10 +61,6 @@ static av_always_inline unsigned UMULH(unsigned a, unsigned b){
 }
 #endif
 
-#ifndef MUL64
-#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
-#endif
-
 #ifndef MAC64
 #   define MAC64(d, a, b) ((d) += MUL64(a, b))
 #endif

From 594fbe42c6b21aef76b938ce97524fa92a48e7a0 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 21:16:04 +0100
Subject: [PATCH 585/830] ARM: remove MULL inline asm

Reasonable gcc versions get this one right on their own.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mathops.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 7c2acca2e8..3870fce3e2 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -28,18 +28,6 @@
 
 #if HAVE_INLINE_ASM
 
-#   define MULL MULL
-static inline av_const int MULL(int a, int b, unsigned shift)
-{
-    int lo, hi;
-    __asm__("smull %0, %1, %2, %3     \n\t"
-            "mov   %0, %0,     lsr %4 \n\t"
-            "add   %1, %0, %1, lsl %5 \n\t"
-            : "=&r"(lo), "=&r"(hi)
-            : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift));
-    return hi;
-}
-
 #define MULH MULH
 #define MUL64 MUL64
 

From 35d5cb1ab4dab657ef31b845893cf98d885c8add Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 1 Jun 2011 12:40:01 -0400
Subject: [PATCH 586/830] ac3enc: extract all exponents for the frame at once

---
 libavcodec/ac3enc.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index dbe7784eae..53f6251a5e 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -900,15 +900,11 @@ static av_cold void exponent_init(AC3EncodeContext *s)
  */
 static void extract_exponents(AC3EncodeContext *s)
 {
-    int blk, ch;
+    int ch        = !s->cpl_on;
+    int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1);
+    AC3Block *block = &s->blocks[0];
 
-    for (ch = !s->cpl_on; ch <= s->channels; ch++) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch],
-                                        AC3_MAX_COEFS);
-        }
-    }
+    s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size);
 }
 
 

From ec4207c46ae3f9a1add21dbe3eee124bb2b10eee Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 5 Jun 2011 04:24:45 +0200
Subject: [PATCH 587/830] swscale: factor should_dither out

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index d2b3337dc3..29c26e87ac 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1730,6 +1730,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
     int lastDstY;
     uint32_t *pal=c->pal_yuv;
+    int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
 
     /* vars which will change and which we need to store back in the context */
     int dstY= c->dstY;
@@ -1787,8 +1788,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
-        const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY   &7] : flat64;
-        const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
+        const uint8_t *lumDither= should_dither ? dithers[7][dstY   &7] : flat64;
+        const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
 
         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];

From ba91bf58cd8bab4de55ec31ffcdf6cc71f7e5e42 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 5 Jun 2011 04:31:36 +0200
Subject: [PATCH 588/830] swscale: override the lack of the accurate rounding
 flag when needed for dither.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/x86/swscale_template.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ccd40521a7..58c6bdeee4 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2366,7 +2366,8 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
             default: break;
             }
         } else {
-            c->yuv2yuv1     = RENAME(yuv2yuv1    );
+            int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
+            c->yuv2yuv1     = should_dither ? RENAME(yuv2yuv1_ar    ) : RENAME(yuv2yuv1    );
             c->yuv2yuvX     = RENAME(yuv2yuvX    );
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;

From dcb73592f4f938eb2fd121f434236d5065ff26b6 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@collabora.co.uk>
Date: Sun, 5 Jun 2011 10:31:24 +0200
Subject: [PATCH 589/830] configure: Document --enable-vaapi

VAAPI is disabled by default so it should have a --enable-vaapi option
documented, not a --disable-vaapi.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 805e4ba50a..fe5b700db5 100755
--- a/configure
+++ b/configure
@@ -106,7 +106,7 @@ Configuration options:
   --disable-lpc            disable LPC code
   --disable-mdct           disable MDCT code
   --disable-rdft           disable RDFT code
-  --disable-vaapi          disable VAAPI code
+  --enable-vaapi           enable VAAPI code
   --disable-vdpau          disable VDPAU code
   --disable-dxva2          disable DXVA2 code
   --enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)

From 5a4a71257c0d0b1f4a5b4f0ebd4d96515feea23a Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 18:48:46 +0100
Subject: [PATCH 590/830] Fix build of eval-test program

eval.c has moved to libavutil, move the TESTPROGS entry too.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/Makefile | 2 +-
 libavutil/Makefile  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 77f285eda0..ce96950301 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -662,7 +662,7 @@ SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 
 EXAMPLES = api
 
-TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
+TESTPROGS = cabac dct fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 1386ebb190..01231bd52d 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -75,7 +75,7 @@ OBJS-$(ARCH_ARM) += arm/cpu.o
 OBJS-$(ARCH_PPC) += ppc/cpu.o
 OBJS-$(ARCH_X86) += x86/cpu.o
 
-TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha tree
+TESTPROGS = adler32 aes base64 cpu crc des eval lls md5 pca sha tree
 TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
 
 DIRS = arm bfin sh4 x86

From a43458d7eb32c2cc560c58c78949f152a8e95e3d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 9 May 2011 22:11:57 +0200
Subject: [PATCH 591/830] tiff: fix linesize for mono-white/black formats.

Fix decoding of file Test_1bpp.tif

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/tiff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index bea353275f..eaaeb84425 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -105,7 +105,7 @@ static int tiff_uncompress(uint8_t *dst, unsigned long *len, const uint8_t *src,
 static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uint8_t *src, int size, int lines){
     int c, line, pixels, code;
     const uint8_t *ssrc = src;
-    int width = s->width * s->bpp >> 3;
+    int width = ((s->width * s->bpp) + 7) >> 3;
 #if CONFIG_ZLIB
     uint8_t *zbuf; unsigned long outlen;
 

From 1863a3c7aa897b2077e408ab0bdc57cb3a13d5f3 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 9 May 2011 21:26:39 +0200
Subject: [PATCH 592/830] tiff: print log in case of unknown / unsupported tag.

Helps debugging.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/tiff.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index eaaeb84425..08cd3b0a6f 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -478,6 +478,8 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
         if(s->compr == TIFF_G4)
             s->fax_opts = value;
         break;
+    default:
+        av_log(s->avctx, AV_LOG_DEBUG, "Unknown or unsupported tag %d/0X%0X\n", tag, tag);
     }
     return 0;
 }

From 0d0fdb0ad59c0533fb91dad00379f762573ce541 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 5 Jun 2011 00:57:05 +0200
Subject: [PATCH 593/830] sdl: use the filename for defining the window title,
 if not specified

This allows a more efficient use of the commandline.
---
 doc/outdevs.texi       | 9 +++------
 libavdevice/avdevice.h | 2 +-
 libavdevice/sdl.c      | 4 +++-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/doc/outdevs.texi b/doc/outdevs.texi
index 9985900afc..79619f80b9 100644
--- a/doc/outdevs.texi
+++ b/doc/outdevs.texi
@@ -45,8 +45,8 @@ For more information about SDL, check:
 @table @option
 
 @item window_title
-Set the SDL window title, if not specified default to "SDL video
-outdev".
+Set the SDL window title, if not specified default to the filename
+specified for the output device.
 
 @item icon_title
 Set the name of the iconified SDL window, if not specified it is set
@@ -63,12 +63,9 @@ If not specified it defaults to the size of the input video.
 The following command shows the @file{ffmpeg} output is an
 SDL window, forcing its size to the qcif format:
 @example
-ffmpeg -i INPUT -vcodec rawvideo -pix_fmt yuv420p -window_size qcif -f sdl none
+ffmpeg -i INPUT -vcodec rawvideo -pix_fmt yuv420p -window_size qcif -f sdl "SDL output"
 @end example
 
-Note that the name specified for the output device is ignored, so it
-can be set to an arbitrary value ("none" in the above example).
-
 @section sndio
 
 sndio audio output device.
diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h
index be56be48d7..e49e5b71f7 100644
--- a/libavdevice/avdevice.h
+++ b/libavdevice/avdevice.h
@@ -24,7 +24,7 @@
 
 #define LIBAVDEVICE_VERSION_MAJOR 53
 #define LIBAVDEVICE_VERSION_MINOR  1
-#define LIBAVDEVICE_VERSION_MICRO  0
+#define LIBAVDEVICE_VERSION_MICRO  1
 
 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
                                                LIBAVDEVICE_VERSION_MINOR, \
diff --git a/libavdevice/sdl.c b/libavdevice/sdl.c
index 07f60cd648..36f65f2395 100644
--- a/libavdevice/sdl.c
+++ b/libavdevice/sdl.c
@@ -78,6 +78,8 @@ static int sdl_write_header(AVFormatContext *s)
     float sar, dar; /* sample and display aspect ratios */
     int i, ret;
 
+    if (!sdl->window_title)
+        sdl->window_title = av_strdup(s->filename);
     if (!sdl->icon_title)
         sdl->icon_title = av_strdup(sdl->window_title);
 
@@ -201,7 +203,7 @@ static int sdl_write_packet(AVFormatContext *s, AVPacket *pkt)
 #define OFFSET(x) offsetof(SDLContext,x)
 
 static const AVOption options[] = {
-    { "window_title", "SDL window title",           OFFSET(window_title),  FF_OPT_TYPE_STRING, {.str = "SDL video outdev" }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_title", "SDL window title",           OFFSET(window_title),  FF_OPT_TYPE_STRING, {.str = NULL }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
     { "icon_title",   "SDL iconified window title", OFFSET(icon_title)  ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_size",  "SDL window forced size",     OFFSET(window_size) ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL },

From 96f931adf75967dc86fbf3ee21517e539d0a6e50 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 4 Jun 2011 23:54:19 +0200
Subject: [PATCH 594/830] cmdutils: change the signature of the function
 argument in parse_options()

This is required for a pending simplification.
---
 cmdutils.c | 8 +++++---
 cmdutils.h | 2 +-
 ffmpeg.c   | 3 ++-
 ffplay.c   | 3 ++-
 ffprobe.c  | 3 ++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/cmdutils.c b/cmdutils.c
index 2bc6b7417f..2bf4e03ac8 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -217,7 +217,7 @@ static inline void prepare_app_arguments(int *argc_ptr, char ***argv_ptr)
 #endif /* WIN32 && !__MINGW32CE__ */
 
 void parse_options(int argc, char **argv, const OptionDef *options,
-                   void (* parse_arg_function)(const char*))
+                   int (* parse_arg_function)(const char *opt, const char *arg))
 {
     const char *opt, *arg;
     int optindex, handleoptions=1;
@@ -284,8 +284,10 @@ unknown_opt:
             if(po->flags & OPT_EXIT)
                 exit(0);
         } else {
-            if (parse_arg_function)
-                parse_arg_function(opt);
+            if (parse_arg_function) {
+                if (parse_arg_function(NULL, opt) < 0)
+                    exit(1);
+            }
         }
     }
 }
diff --git a/cmdutils.h b/cmdutils.h
index eca98a3cf4..171ef3cb5b 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -151,7 +151,7 @@ void show_help_options(const OptionDef *options, const char *msg, int mask, int
  * not have to be processed.
  */
 void parse_options(int argc, char **argv, const OptionDef *options,
-                   void (* parse_arg_function)(const char*));
+                   int (* parse_arg_function)(const char *opt, const char *arg));
 
 void set_context_opts(void *ctx, void *opts_ctx, int flags, AVCodec *codec);
 
diff --git a/ffmpeg.c b/ffmpeg.c
index 99546f7ae0..3cfd5ca060 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -3855,7 +3855,7 @@ static int opt_streamid(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_output_file(const char *filename)
+static int opt_output_file(const char *opt, const char *filename)
 {
     AVFormatContext *oc;
     int err, use_video, use_audio, use_subtitle, use_data;
@@ -3984,6 +3984,7 @@ static void opt_output_file(const char *filename)
     av_freep(&forced_key_frames);
     uninit_opts();
     init_opts();
+    return 0;
 }
 
 /* same option as mencoder */
diff --git a/ffplay.c b/ffplay.c
index 4139afb7cc..48f5f144d7 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -3006,7 +3006,7 @@ static void show_help(void)
            );
 }
 
-static void opt_input_file(const char *filename)
+static int opt_input_file(const char *opt, const char *filename)
 {
     if (input_filename) {
         fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n",
@@ -3016,6 +3016,7 @@ static void opt_input_file(const char *filename)
     if (!strcmp(filename, "-"))
         filename = "pipe:";
     input_filename = filename;
+    return 0;
 }
 
 /* Called from the main */
diff --git a/ffprobe.c b/ffprobe.c
index 57e2a9bc06..44252e59da 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -339,7 +339,7 @@ static int opt_format(const char *opt, const char *arg)
     return 0;
 }
 
-static void opt_input_file(const char *arg)
+static int opt_input_file(const char *opt, const char *arg)
 {
     if (input_filename) {
         fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n",
@@ -349,6 +349,7 @@ static void opt_input_file(const char *arg)
     if (!strcmp(arg, "-"))
         arg = "pipe:";
     input_filename = arg;
+    return 0;
 }
 
 static void show_help(void)

From b4af3cf3470a14626e5d2f40ce3c88bfdd0c8561 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 5 Jun 2011 00:03:59 +0200
Subject: [PATCH 595/830] cmdutils: remove unnecessary OPT_DUMMY implementation

The -i INPUT option can be implemented more cleanly by using a
function option, which can easily be done now that the
parse_arg_function passed to parse_options has a standard signature.
---
 cmdutils.c |  2 --
 cmdutils.h |  1 -
 ffplay.c   | 28 ++++++++++++++--------------
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/cmdutils.c b/cmdutils.c
index 2bf4e03ac8..4f27f50f40 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -273,8 +273,6 @@ unknown_opt:
                 *po->u.int64_arg = parse_number_or_die(opt, arg, OPT_INT64, INT64_MIN, INT64_MAX);
             } else if (po->flags & OPT_FLOAT) {
                 *po->u.float_arg = parse_number_or_die(opt, arg, OPT_FLOAT, -INFINITY, INFINITY);
-            } else if (po->flags & OPT_DUMMY) {
-                /* Do nothing for this option */
             } else {
                 if (po->u.func_arg(opt, arg) < 0) {
                     fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt);
diff --git a/cmdutils.h b/cmdutils.h
index 171ef3cb5b..5fd398d054 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -128,7 +128,6 @@ typedef struct {
 #define OPT_INT64  0x0400
 #define OPT_EXIT   0x0800
 #define OPT_DATA   0x1000
-#define OPT_DUMMY  0x2000
      union {
         int *int_arg;
         char **str_arg;
diff --git a/ffplay.c b/ffplay.c
index 48f5f144d7..3ebcd59c1c 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -2916,6 +2916,19 @@ static int opt_show_mode(const char *opt, const char *arg)
     return 0;
 }
 
+static int opt_input_file(const char *opt, const char *filename)
+{
+    if (input_filename) {
+        fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n",
+                filename, input_filename);
+        exit(1);
+    }
+    if (!strcmp(filename, "-"))
+        filename = "pipe:";
+    input_filename = filename;
+    return 0;
+}
+
 static const OptionDef options[] = {
 #include "cmdutils_common_opts.h"
     { "x", HAS_ARG, {(void*)opt_width}, "force displayed width", "width" },
@@ -2961,7 +2974,7 @@ static const OptionDef options[] = {
     { "rdftspeed", OPT_INT | HAS_ARG| OPT_AUDIO | OPT_EXPERT, {(void*)&rdftspeed}, "rdft speed", "msecs" },
     { "showmode", HAS_ARG, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" },
     { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
-    { "i", OPT_DUMMY, {NULL}, "ffmpeg compatibility dummy option", ""},
+    { "i", HAS_ARG, {(void *)opt_input_file}, "read specified file", "input_file"},
     { NULL, },
 };
 
@@ -3006,19 +3019,6 @@ static void show_help(void)
            );
 }
 
-static int opt_input_file(const char *opt, const char *filename)
-{
-    if (input_filename) {
-        fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n",
-                filename, input_filename);
-        exit(1);
-    }
-    if (!strcmp(filename, "-"))
-        filename = "pipe:";
-    input_filename = filename;
-    return 0;
-}
-
 /* Called from the main */
 int main(int argc, char **argv)
 {

From 9b032c65547a0c396902c101e7a5efdabb3025b9 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 5 Jun 2011 00:06:20 +0200
Subject: [PATCH 596/830] ffplay.texi: document -i FILE option

---
 doc/ffplay.texi | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index 7b99666f1f..de942bb771 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -13,7 +13,7 @@
 
 @example
 @c man begin SYNOPSIS
-ffplay [options] @file{input_file}
+ffplay [options] [@file{input_file}]
 @c man end
 @end example
 
@@ -82,6 +82,8 @@ the input video.
 Use the option "-filters" to show all the available filters (including
 also sources and sinks).
 
+@item -i @var{input_file}
+Read @var{input_file}.
 @end table
 
 @section Advanced options

From 92b4abc2ad88ac7e89a60480094b8398fe1b8dbf Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 01:06:13 +0100
Subject: [PATCH 597/830] tableprint: Restore mistakenly deleted common.h
 #include for FF_ARRAY_ELEMS.

This fixes the build with hardcoded tables enabled.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/tableprint.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index de355fc0aa..ddf2635da0 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -26,6 +26,8 @@
 #include <inttypes.h>
 #include <stdio.h>
 
+#include "libavutil/common.h"
+
 #define WRITE_1D_FUNC_ARGV(type, linebrk, fmtstr, ...)\
 void write_##type##_array(const type *data, int len)\
 {\

From cea87fb2c38f175b9a0bd2a46617c155e67d4794 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 5 Jun 2011 00:09:04 +0200
Subject: [PATCH 598/830] ffprobe: implement -i FILE option

Useful for mimicking the ffmpeg -i FILE syntax.
---
 doc/ffprobe.texi | 3 +++
 ffprobe.c        | 1 +
 2 files changed, 4 insertions(+)

diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index ee4ffb9671..6f7e83b267 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -108,6 +108,9 @@ multimedia stream.
 Each media stream information is printed within a dedicated section
 with name "STREAM".
 
+@item -i @var{input_file}
+Read @var{input_file}.
+
 @end table
 @c man end
 
diff --git a/ffprobe.c b/ffprobe.c
index 44252e59da..3139c28774 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -385,6 +385,7 @@ static const OptionDef options[] = {
     { "show_packets", OPT_BOOL, {(void*)&do_show_packets}, "show packets info" },
     { "show_streams", OPT_BOOL, {(void*)&do_show_streams}, "show streams info" },
     { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "i", HAS_ARG, {(void *)opt_input_file}, "read specified file", "input_file"},
     { NULL, },
 };
 

From e4841a404bdabfeafb917454d510b60d888cb761 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 3 Jun 2011 19:03:44 +0200
Subject: [PATCH 599/830] ffmpeg: fix massive leak occurring when seeking

Avoid to add frames to the vsrc_buffer in the case ist->pts <
start_time, as these frames are unused (and never released). In
particular this condition is verified with commands of the kind:
ffmpeg -i INPUT -ss TIME OUTPUT

Also allow a minor simplification.
---
 ffmpeg.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 3cfd5ca060..fb619e7777 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1656,21 +1656,6 @@ static int output_packet(AVInputStream *ist, int ist_index,
             avpkt.size = 0;
         }
 
-#if CONFIG_AVFILTER
-        if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
-            for(i=0;i<nb_ostreams;i++) {
-                ost = ost_table[i];
-                if (ost->input_video_filter && ost->source_index == ist_index) {
-                    if (!picture.sample_aspect_ratio.num)
-                        picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
-                    picture.pts = ist->pts;
-
-                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture);
-                }
-            }
-        }
-#endif
-
         // preprocess audio (volume)
         if (ist->st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
             if (audio_volume != 256) {
@@ -1701,6 +1686,13 @@ static int output_packet(AVInputStream *ist, int ist_index,
                 ost = ost_table[i];
                 if (ost->source_index == ist_index) {
 #if CONFIG_AVFILTER
+                if (ost->input_video_filter) {
+                    if (!picture.sample_aspect_ratio.num)
+                         picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
+                    picture.pts = ist->pts;
+
+                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture);
+                }
                 frame_available = ist->st->codec->codec_type != AVMEDIA_TYPE_VIDEO ||
                     !ost->output_video_filter || avfilter_poll_frame(ost->output_video_filter->inputs[0]);
                 while (frame_available) {

From e844abc4983385567cfca1a638877e19f0f1cfeb Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 5 Jun 2011 13:55:02 +0200
Subject: [PATCH 600/830] sdl: align option fields after last commit

---
 libavdevice/sdl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavdevice/sdl.c b/libavdevice/sdl.c
index 36f65f2395..4b2566fc63 100644
--- a/libavdevice/sdl.c
+++ b/libavdevice/sdl.c
@@ -204,8 +204,8 @@ static int sdl_write_packet(AVFormatContext *s, AVPacket *pkt)
 
 static const AVOption options[] = {
     { "window_title", "SDL window title",           OFFSET(window_title),  FF_OPT_TYPE_STRING, {.str = NULL }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
-    { "icon_title",   "SDL iconified window title", OFFSET(icon_title)  ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
-    { "window_size",  "SDL window forced size",     OFFSET(window_size) ,  FF_OPT_TYPE_STRING, {.str = NULL },               0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "icon_title",   "SDL iconified window title", OFFSET(icon_title)  ,  FF_OPT_TYPE_STRING, {.str = NULL }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_size",  "SDL window forced size",     OFFSET(window_size) ,  FF_OPT_TYPE_STRING, {.str = NULL }, 0,  0, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL },
 };
 

From b39b06233dfd69b941a32f29171dfb63abb23c06 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 5 Jun 2011 13:17:26 +0200
Subject: [PATCH 601/830] AVOptions: add av_opt_free convenience function.

---
 doc/APIchanges     | 3 +++
 libavutil/avutil.h | 2 +-
 libavutil/opt.c    | 8 ++++++++
 libavutil/opt.h    | 5 +++++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index a55b15284b..77eb6d2d27 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-xx - xxxxxxx - lavu 51.3.0 - opt.h
+  Add av_opt_free convenience function.
+
 2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 8b8ca40c94..5085a6dd0b 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  2
+#define LIBAVUTIL_VERSION_MINOR  3
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/opt.c b/libavutil/opt.c
index 4e25918ed1..172fcec456 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -520,6 +520,14 @@ int av_set_options_string(void *ctx, const char *opts,
     return count;
 }
 
+void av_opt_free(void *obj)
+{
+    const AVOption *o = NULL;
+    while ((o = av_next_option(obj, o)))
+        if (o->type == FF_OPT_TYPE_STRING || o->type == FF_OPT_TYPE_BINARY)
+            av_freep((uint8_t *)obj + o->offset);
+}
+
 #ifdef TEST
 
 #undef printf
diff --git a/libavutil/opt.h b/libavutil/opt.h
index 6668139fec..8c3b6c1c36 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -176,4 +176,9 @@ void av_opt_set_defaults2(void *s, int mask, int flags);
 int av_set_options_string(void *ctx, const char *opts,
                           const char *key_val_sep, const char *pairs_sep);
 
+/**
+ * Free all string and binary options in obj.
+ */
+void av_opt_free(void *obj);
+
 #endif /* AVUTIL_OPT_H */

From 367732832faaf1bac4ece37cf7fef8c911e16312 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 5 Jun 2011 13:18:17 +0200
Subject: [PATCH 602/830] lavf,lavc: free avoptions in a generic way.

It's simpler and less error-prone.

Fixes some memleaks along the way.
---
 libavcodec/utils.c      | 3 +++
 libavdevice/bktr.c      | 2 --
 libavdevice/fbdev.c     | 1 -
 libavdevice/libdc1394.c | 3 ---
 libavdevice/v4l2.c      | 4 ----
 libavdevice/vfwcap.c    | 3 ---
 libavdevice/x11grab.c   | 2 --
 libavformat/rawdec.c    | 4 ----
 libavformat/tty.c       | 1 -
 libavformat/utils.c     | 7 ++++++-
 10 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 65792a6f5a..2b417defbf 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -783,6 +783,9 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         avctx->codec->close(avctx);
     avcodec_default_free_buffers(avctx);
     avctx->coded_frame = NULL;
+    if (avctx->codec->priv_class)
+        av_opt_free(avctx->priv_data);
+    av_opt_free(avctx);
     av_freep(&avctx->priv_data);
     if(avctx->codec && avctx->codec->encode)
         av_freep(&avctx->extradata);
diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 6e19a61103..4d3933f4e9 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -320,8 +320,6 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     last_frame_time = 0;
 
 out:
-    av_freep(&s->video_size);
-    av_freep(&s->framerate);
     return ret;
 }
 
diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index 7e9ffe5c77..afd6b94ed0 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -103,7 +103,6 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     int ret, flags = O_RDONLY;
 
     ret = av_parse_video_rate(&fbdev->fps, fbdev->framerate);
-    av_freep(&fbdev->framerate);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Couldn't parse framerate.\n");
         return ret;
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index b17d0fb441..622579bc92 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -195,9 +195,6 @@ static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap,
     *select_fps = fps;
     *select_fmt = fmt;
 out:
-    av_freep(&dc1394->video_size);
-    av_freep(&dc1394->pixel_format);
-    av_freep(&dc1394->framerate);
     return ret;
 }
 
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 98ff82ec0d..839d290b63 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -684,10 +684,6 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
 
 out:
-    av_freep(&s->video_size);
-    av_freep(&s->pixel_format);
-    av_freep(&s->standard);
-    av_freep(&s->framerate);
     return res;
 }
 
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 5dd873bdcb..95dd4c34b8 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -234,9 +234,6 @@ static int vfw_read_close(AVFormatContext *s)
         pktl = next;
     }
 
-    av_freep(&ctx->video_size);
-    av_freep(&ctx->framerate);
-
     return 0;
 }
 
diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index b1ca6e699a..c6dc673520 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -258,8 +258,6 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(x11grab->time_base) * 8;
 
 out:
-    av_freep(&x11grab->video_size);
-    av_freep(&x11grab->framerate);
     return ret;
 }
 
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index a92200d844..cc05c353e9 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -102,9 +102,6 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
             st->codec->height = height;
             st->codec->pix_fmt = pix_fmt;
 fail:
-            av_freep(&s1->video_size);
-            av_freep(&s1->pixel_format);
-            av_freep(&s1->framerate);
             return ret;
             }
         default:
@@ -182,7 +179,6 @@ int ff_raw_video_read_header(AVFormatContext *s,
     av_set_pts_info(st, 64, 1, 1200000);
 
 fail:
-    av_freep(&s1->framerate);
     return ret;
 }
 
diff --git a/libavformat/tty.c b/libavformat/tty.c
index 9dada16b8a..970274eb54 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -121,7 +121,6 @@ static int read_header(AVFormatContext *avctx,
     }
 
 fail:
-    av_freep(&s->video_size);
     return ret;
 }
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 1050959dcb..bdc20f6d9b 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2549,6 +2549,10 @@ void avformat_free_context(AVFormatContext *s)
     int i;
     AVStream *st;
 
+    av_opt_free(s);
+    if (s->iformat && s->iformat->priv_class)
+        av_opt_free(s->priv_data);
+
     for(i=0;i<s->nb_streams;i++) {
         /* free all data in a stream component */
         st = s->streams[i];
@@ -2578,7 +2582,6 @@ void avformat_free_context(AVFormatContext *s)
     }
     av_freep(&s->chapters);
     av_metadata_free(&s->metadata);
-    av_freep(&s->key);
     av_freep(&s->streams);
     av_free(s);
 }
@@ -3094,6 +3097,8 @@ fail:
         av_freep(&s->streams[i]->priv_data);
         av_freep(&s->streams[i]->index_entries);
     }
+    if (s->iformat && s->iformat->priv_class)
+        av_opt_free(s->priv_data);
     av_freep(&s->priv_data);
     return ret;
 }

From 29b3de127f332ef7dff863544730f0b4b0741a11 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 02:00:21 +0200
Subject: [PATCH 603/830] Skip generated table headers during 'make
 checkheaders'.

---
 libavcodec/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ce96950301..6088d21307 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -651,8 +651,9 @@ OBJS-$(!CONFIG_SMALL)                  += inverse.o
 
 -include $(SUBDIR)$(ARCH)/Makefile
 
-SKIPHEADERS                            += %_tablegen.h aac_tablegen_decl.h \
-                                          fft-internal.h $(ARCH)/vp56_arith.h
+SKIPHEADERS                            += %_tablegen.h %_tables.h \
+                                          aac_tablegen_decl.h fft-internal.h \
+                                          $(ARCH)/vp56_arith.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBDIRAC)         += libdirac.h
 SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h

From f25a2ece76756214da7dcde4a52a0534d9503319 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 00:59:52 +0200
Subject: [PATCH 604/830] Drop explicit filenames from @file Doxygen tags.

The filename is unnecessary and brittle across file renames.
---
 libavcodec/lagarith.c    | 2 +-
 libavcodec/lagarithrac.c | 2 +-
 libavcodec/lagarithrac.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 53f274da94..3d53536d13 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -20,7 +20,7 @@
  */
 
 /**
- * @file libavcodec/lagarith.c
+ * @file
  * Lagarith lossless decoder
  * @author Nathan Caldwell
  */
diff --git a/libavcodec/lagarithrac.c b/libavcodec/lagarithrac.c
index f1ffbf0a9b..ab7a60011d 100644
--- a/libavcodec/lagarithrac.c
+++ b/libavcodec/lagarithrac.c
@@ -21,7 +21,7 @@
  */
 
 /**
- * @file libavcodec/lagarithrac.c
+ * @file
  * Lagarith range decoder
  * @author Nathan Caldwell
  * @author David Conrad
diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h
index 7d0d05c6ac..6a8fa95a8e 100644
--- a/libavcodec/lagarithrac.h
+++ b/libavcodec/lagarithrac.h
@@ -21,7 +21,7 @@
  */
 
 /**
- * @file libavcodec/lagarithrac.h
+ * @file
  * Lagarith range decoder
  * @author Nathan Caldwell
  * @author David Conrad

From fb8648ad4b94abd119ae75b174f578aaacbbfd55 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 01:54:45 +0200
Subject: [PATCH 605/830] Remove unnecessary LIBAVFORMAT_BUILD #ifdef.

---
 libavutil/internal.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavutil/internal.h b/libavutil/internal.h
index 51e449d3fe..ee11a0a9d2 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -141,7 +141,6 @@
 #define strncpy strncpy_is_forbidden_due_to_security_issues_use_av_strlcpy
 #undef  exit
 #define exit exit_is_forbidden
-#ifndef LIBAVFORMAT_BUILD
 #undef  printf
 #define printf please_use_av_log_instead_of_printf
 #undef  fprintf
@@ -150,7 +149,6 @@
 #define puts please_use_av_log_instead_of_puts
 #undef  perror
 #define perror please_use_av_log_instead_of_perror
-#endif
 
 #define FF_ALLOC_OR_GOTO(ctx, p, size, label)\
 {\

From ef5d7e18f4be8f36afc1034369849bff980ad9b1 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 01:59:14 +0200
Subject: [PATCH 606/830] Skip tableprint.h during 'make checkheaders'.

It is only used on the host and may not compile on the target.
---
 libavcodec/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6088d21307..b772bf185e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -651,8 +651,11 @@ OBJS-$(!CONFIG_SMALL)                  += inverse.o
 
 -include $(SUBDIR)$(ARCH)/Makefile
 
-SKIPHEADERS                            += %_tablegen.h %_tables.h \
-                                          aac_tablegen_decl.h fft-internal.h \
+SKIPHEADERS                            += %_tablegen.h                  \
+                                          %_tables.h                    \
+                                          aac_tablegen_decl.h           \
+                                          fft-internal.h                \
+                                          tableprint.h                  \
                                           $(ARCH)/vp56_arith.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBDIRAC)         += libdirac.h

From 612d0782fc885aaa0bbb8f8966d425ea91a606cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 1 Jun 2011 20:44:08 +0200
Subject: [PATCH 607/830] Add const to
 avfilter_get_video_buffer_ref_from_arrays arguments.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoids warning about discarding qualifiers in avcodec.c

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
---
 libavfilter/avfilter.c | 2 +-
 libavfilter/avfilter.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 037d5864ae..fae3c6c5f2 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -365,7 +365,7 @@ AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, int
 }
 
 AVFilterBufferRef *
-avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms,
+avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms,
                                           int w, int h, enum PixelFormat format)
 {
     AVFilterBuffer *pic = av_mallocz(sizeof(AVFilterBuffer));
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index e8e2a8b9d1..4ff68cd180 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -674,7 +674,7 @@ AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms,
  * @param format the pixel format of the image specified by the data and linesize arrays
  */
 AVFilterBufferRef *
-avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms,
+avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms,
                                           int w, int h, enum PixelFormat format);
 
 /**

From 2c6fb9f03204d25bee68f1175233c1fff8e77e65 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sun, 5 Jun 2011 14:20:56 -0500
Subject: [PATCH 608/830] v4l2: do not force NTSC as standard

Setting a standard is meaningful only for analog capture devices.
---
 libavdevice/v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 839d290b63..2553bca48a 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -730,7 +730,7 @@ static int v4l2_read_close(AVFormatContext *s1)
 #define OFFSET(x) offsetof(struct video_data, x)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { "standard", "", offsetof(struct video_data, standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", offsetof(struct video_data, channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },

From a8d44f9dd5e2b0e65976c586a6185882d673317b Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Sun, 5 Jun 2011 19:20:05 -0400
Subject: [PATCH 609/830] Add x86 assembly for some 10-bit H.264 intra predict
 functions.

Parts are inspired from the 8-bit H.264 predict code in Libav.
Other parts ported from x264 with relicensing permission from author.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/Makefile                 |   3 +-
 libavcodec/x86/h264_intrapred_10bit.asm | 337 ++++++++++++++++++++++++
 libavcodec/x86/h264_intrapred_init.c    |  61 ++++-
 3 files changed, 396 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/x86/h264_intrapred_10bit.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 38b736e5e7..1c451c8352 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -16,7 +16,8 @@ YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
                                           x86/h264_idct_10bit.o         \
                                           x86/h264_weight.o             \
 
-YASM-OBJS-$(CONFIG_H264PRED)           += x86/h264_intrapred.o
+YASM-OBJS-$(CONFIG_H264PRED)           += x86/h264_intrapred.o          \
+                                          x86/h264_intrapred_10bit.o
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
 
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
new file mode 100644
index 0000000000..5cb593ac38
--- /dev/null
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -0,0 +1,337 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code
+;*****************************************************************************
+;* Copyright (C) 2005-2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+SECTION .text
+
+cextern pw_4
+cextern pw_1
+
+%macro PRED4x4_LOWPASS 4
+    paddw       %2, %3
+    psrlw       %2, 1
+    pavgw       %1, %4, %2
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void pred4x4_down_right(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_DR 1
+cglobal pred4x4_down_right_10_%1, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movhps    m1, [r1-8]
+    movhps    m2, [r0+r2*1-8]
+    movhps    m4, [r0-8]
+    punpckhwd m2, m4
+    movq      m3, [r0]
+    punpckhdq m1, m2
+    PALIGNR   m3, m1, 10, m1
+    mova      m1, m3
+    movhps    m4, [r1+r2*1-8]
+    PALIGNR   m3, m4, 14, m4
+    mova      m2, m3
+    movhps    m4, [r1+r2*2-8]
+    PALIGNR   m3, m4, 14, m4
+    PRED4x4_LOWPASS m0, m3, m1, m2
+    movq      [r1+r2*2], m0
+    psrldq    m0, 2
+    movq      [r1+r2*1], m0
+    psrldq    m0, 2
+    movq      [r0+r2*2], m0
+    psrldq    m0, 2
+    movq      [r0+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED4x4_DR sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED4x4_DR ssse3
+%ifdef HAVE_AVX
+INIT_AVX
+PRED4x4_DR avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_VR 1
+cglobal pred4x4_vertical_right_10_%1, 3,3,6
+    sub     r0, r2
+    lea     r1, [r0+r2*2]
+    movq    m5, [r0]            ; ........t3t2t1t0
+    movhps  m1, [r0-8]
+    PALIGNR m0, m5, m1, 14, m1  ; ......t3t2t1t0lt
+    pavgw   m5, m0
+    movhps  m1, [r0+r2*1-8]
+    PALIGNR m0, m1, 14, m1      ; ....t3t2t1t0ltl0
+    mova    m1, m0
+    movhps  m2, [r0+r2*2-8]
+    PALIGNR m0, m2, 14, m2      ; ..t3t2t1t0ltl0l1
+    mova    m2, m0
+    movhps  m3, [r1+r2*1-8]
+    PALIGNR m0, m3, 14, m3      ; t3t2t1t0ltl0l1l2
+    PRED4x4_LOWPASS m3, m1, m0, m2
+    pslldq  m1, m3, 12
+    psrldq  m3, 4
+    movq    [r0+r2*1], m5
+    movq    [r0+r2*2], m3
+    PALIGNR m5, m1, 14, m2
+    pslldq  m1, 2
+    movq    [r1+r2*1], m5
+    PALIGNR m3, m1, 14, m1
+    movq    [r1+r2*2], m3
+    RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED4x4_VR sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED4x4_VR ssse3
+%ifdef HAVE_AVX
+INIT_AVX
+PRED4x4_VR avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_HD 1
+cglobal pred4x4_horizontal_down_10_%1, 3,3
+    sub        r0, r2
+    lea        r1, [r0+r2*2]
+    movq       m0, [r0-8]      ; lt ..
+    movhps     m0, [r0]
+    pslldq     m0, 2           ; t2 t1 t0 lt .. .. .. ..
+    movq       m1, [r1+r2*2-8] ; l3
+    movq       m3, [r1+r2*1-8]
+    punpcklwd  m1, m3          ; l2 l3
+    movq       m2, [r0+r2*2-8] ; l1
+    movq       m3, [r0+r2*1-8]
+    punpcklwd  m2, m3          ; l0 l1
+    punpckhdq  m1, m2          ; l0 l1 l2 l3
+    punpckhqdq m1, m0          ; t2 t1 t0 lt l0 l1 l2 l3
+    psrldq     m0, m1, 4       ; .. .. t2 t1 t0 lt l0 l1
+    psrldq     m2, m1, 2       ; .. t2 t1 t0 lt l0 l1 l2
+    pavgw      m5, m1, m2
+    PRED4x4_LOWPASS m3, m1, m0, m2
+    punpcklwd  m5, m3
+    psrldq     m3, 8
+    PALIGNR    m3, m5, 12, m4
+    movq       [r1+r2*2], m5
+    movhps     [r0+r2*2], m5
+    psrldq     m5, 4
+    movq       [r1+r2*1], m5
+    movq       [r0+r2*1], m3
+    RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED4x4_HD sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED4x4_HD ssse3
+%ifdef HAVE_AVX
+INIT_AVX
+PRED4x4_HD avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void pred4x4_dc(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro HADDD 2 ; sum junk
+%if mmsize == 16
+    movhlps %2, %1
+    paddd   %1, %2
+    pshuflw %2, %1, 0xE
+    paddd   %1, %2
+%else
+    pshufw  %2, %1, 0xE
+    paddd   %1, %2
+%endif
+%endmacro
+
+%macro HADDW 2
+    pmaddwd %1, [pw_1]
+    HADDD   %1, %2
+%endmacro
+
+INIT_MMX
+cglobal pred4x4_dc_10_mmxext, 3,3
+    sub    r0, r2
+    lea    r1, [r0+r2*2]
+    movq   m2, [r0+r2*1-8]
+    paddw  m2, [r0+r2*2-8]
+    paddw  m2, [r1+r2*1-8]
+    paddw  m2, [r1+r2*2-8]
+    psrlq  m2, 48
+    movq   m0, [r0]
+    HADDW  m0, m1
+    paddw  m0, [pw_4]
+    paddw  m0, m2
+    psrlw  m0, 3
+    SPLATW m0, m0, 0
+    movq   [r0+r2*1], m0
+    movq   [r0+r2*2], m0
+    movq   [r1+r2*1], m0
+    movq   [r1+r2*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void pred4x4_down_left(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+;TODO: more AVX here
+%macro PRED4x4_DL 1
+cglobal pred4x4_down_left_10_%1, 3,3
+    sub        r0, r2
+    movq       m1, [r0]
+    movhps     m1, [r1]
+    pslldq     m5, m1, 2
+    pxor       m2, m5, m1
+    psrldq     m2, 2
+    pxor       m3, m1, m2
+    PRED4x4_LOWPASS m0, m5, m3, m1
+    lea        r1, [r0+r2*2]
+    movhps     [r1+r2*2], m0
+    psrldq     m0, 2
+    movq       [r0+r2*1], m0
+    psrldq     m0, 2
+    movq       [r0+r2*2], m0
+    psrldq     m0, 2
+    movq       [r1+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM
+PRED4x4_DL sse2
+%ifdef HAVE_AVX
+INIT_AVX
+PRED4x4_DL avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_VL 1
+cglobal pred4x4_vertical_left_10_%1, 3,3
+    sub        r0, r2
+    movu       m1, [r0]
+    movhps     m1, [r1]
+    psrldq     m3, m1, 2
+    psrldq     m2, m1, 4
+    pavgw      m4, m3, m1
+    PRED4x4_LOWPASS m0, m1, m2, m3
+    lea        r1, [r0+r2*2]
+    movq       [r0+r2*1], m4
+    movq       [r0+r2*2], m0
+    psrldq     m4, 2
+    psrldq     m0, 2
+    movq       [r1+r2*1], m4
+    movq       [r1+r2*2], m0
+    RET
+%endmacro
+
+INIT_XMM
+PRED4x4_VL sse2
+%ifdef HAVE_AVX
+INIT_AVX
+PRED4x4_VL avx
+%endif
+
+;-----------------------------------------------------------------------------
+; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+INIT_MMX
+cglobal pred4x4_horizontal_up_10_mmxext, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movq      m0, [r0+r2*1-8]
+    punpckhwd m0, [r0+r2*2-8]
+    movq      m1, [r1+r2*1-8]
+    punpckhwd m1, [r1+r2*2-8]
+    punpckhdq m0, m1
+    pshufw    m1, m1, 0xFF
+    movq      [r1+r2*2], m1
+    movd      [r1+r2*1+4], m1
+    pshufw    m2, m0, 11111001b
+    movq      m1, m2
+    pavgw     m2, m0
+
+    pshufw    m5, m0, 11111110b
+    PRED4x4_LOWPASS m3, m0, m5, m1
+    movq      m6, m2
+    punpcklwd m6, m3
+    movq      [r0+r2*1], m6
+    psrlq     m2, 16
+    psrlq     m3, 16
+    punpcklwd m2, m3
+    movq      [r0+r2*2], m2
+    psrlq     m2, 32
+    movd      [r1+r2*1], m2
+    RET
+
+
+
+;-----------------------------------------------------------------------------
+; void pred8x8_vertical(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal pred8x8_vertical_10_sse2, 2,2
+    sub  r0, r1
+    mova m0, [r0]
+%rep 3
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    lea  r0, [r0+r1*2]
+%endrep
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void pred8x8_horizontal(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal pred8x8_horizontal_10_sse2, 2,3
+    mov          r2, 4
+.loop:
+    movq         m0, [r0+r1*0-8]
+    movq         m1, [r0+r1*1-8]
+    pshuflw      m0, m0, 0xff
+    pshuflw      m1, m1, 0xff
+    punpcklqdq   m0, m0
+    punpcklqdq   m1, m1
+    mova  [r0+r1*0], m0
+    mova  [r0+r1*1], m1
+    lea          r0, [r0+r1*2]
+    dec          r2
+    jg .loop
+    REP_RET
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index bd57030660..da5553571a 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -21,6 +21,31 @@
 #include "libavutil/cpu.h"
 #include "libavcodec/h264pred.h"
 
+#define PRED4x4(TYPE, DEPTH, OPT) \
+void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride);
+
+PRED4x4(dc, 10, mmxext)
+PRED4x4(down_left, 10, sse2)
+PRED4x4(down_left, 10, avx)
+PRED4x4(down_right, 10, sse2)
+PRED4x4(down_right, 10, ssse3)
+PRED4x4(down_right, 10, avx)
+PRED4x4(vertical_left, 10, sse2)
+PRED4x4(vertical_left, 10, avx)
+PRED4x4(vertical_right, 10, sse2)
+PRED4x4(vertical_right, 10, ssse3)
+PRED4x4(vertical_right, 10, avx)
+PRED4x4(horizontal_up, 10, mmxext)
+PRED4x4(horizontal_down, 10, sse2)
+PRED4x4(horizontal_down, 10, ssse3)
+PRED4x4(horizontal_down, 10, avx)
+
+#define PRED8x8(TYPE, DEPTH, OPT) \
+void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
+
+PRED8x8(vertical, 10, sse2)
+PRED8x8(horizontal, 10, sse2)
+
 void ff_pred16x16_vertical_mmx     (uint8_t *src, int stride);
 void ff_pred16x16_vertical_sse     (uint8_t *src, int stride);
 void ff_pred16x16_horizontal_mmx   (uint8_t *src, int stride);
@@ -98,11 +123,8 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
 {
     int mm_flags = av_get_cpu_flags();
-    const int high_depth = bit_depth > 8;
-
-    if (high_depth)
-        return;
 
+    if (bit_depth == 8) {
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
         h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
@@ -226,4 +248,35 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
         }
     }
 #endif
+    } else if (bit_depth == 10) {
+#if HAVE_YASM
+        if (mm_flags & AV_CPU_FLAG_MMX2) {
+            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
+            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
+        }
+        if (mm_flags & AV_CPU_FLAG_SSE2) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
+            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
+
+            h->pred8x8[VERT_PRED8x8        ] = ff_pred8x8_vertical_10_sse2;
+            h->pred8x8[HOR_PRED8x8         ] = ff_pred8x8_horizontal_10_sse2;
+        }
+        if (mm_flags & AV_CPU_FLAG_SSSE3) {
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
+        }
+#if HAVE_AVX
+        if (mm_flags&AV_CPU_FLAG_AVX) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
+        }
+#endif /* HAVE_AVX */
+#endif /* HAVE_YASM */
+    }
 }

From eb7505e4295dca9c3ab8c01e055a458e07857ef0 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 4 Jun 2011 18:45:28 +0200
Subject: [PATCH 610/830] Remove some unused scripts from tools/.

---
 tools/build_avopt         |  9 ---------
 tools/clean-diff          | 11 -----------
 tools/jauche_sortierer.sh | 21 ---------------------
 tools/unwrap-diff         |  2 --
 4 files changed, 43 deletions(-)
 delete mode 100755 tools/build_avopt
 delete mode 100755 tools/clean-diff
 delete mode 100755 tools/jauche_sortierer.sh
 delete mode 100755 tools/unwrap-diff

diff --git a/tools/build_avopt b/tools/build_avopt
deleted file mode 100755
index fcf165765c..0000000000
--- a/tools/build_avopt
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-sed 's/unsigned//g' |\
- sed 's/enum//g' |\
- egrep '^ *(int|float|double|AVRational|char *\*) *[a-zA-Z_0-9]* *;' |\
- sed 's/^ *\([^ ]*\)[ *]*\([^;]*\);.*$/{"\2", NULL, OFFSET(\2), FF_OPT_TYPE_\U\1, DEFAULT, \1_MIN, \1_MAX},/' |\
- sed 's/AVRATIONAL_M/INT_M/g'|\
- sed 's/TYPE_AVRATIONAL/TYPE_RATIONAL/g'|\
- sed 's/FLOAT_M/FLT_M/g'|\
- sed 's/FF_OPT_TYPE_CHAR/FF_OPT_TYPE_STRING/g'
diff --git a/tools/clean-diff b/tools/clean-diff
deleted file mode 100755
index 4600702b10..0000000000
--- a/tools/clean-diff
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-sed '/^+[^+]/!s/	/TaBBaT/g' |\
- expand -t $(seq -s , 9 8 200) |\
- sed 's/TaBBaT/	/g' |\
- sed '/^+[^+]/s/ * $//' |\
- tr -d '\015' |\
- tr '\n' '�' |\
- sed 's/\(@@[^@]*@@�[^@]*\)/\n\1/g' |\
- egrep -v '@@[^@]*@@�(( [^�]*�)|([+-][[:space:]]*�)|(-[[:space:]]*([^�]*)�\+[[:space:]]*\5�))*$' |\
- tr -d '\n' |\
- tr '�' '\n'
diff --git a/tools/jauche_sortierer.sh b/tools/jauche_sortierer.sh
deleted file mode 100755
index 1f84f1a2a9..0000000000
--- a/tools/jauche_sortierer.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-#GPL
-#TODO
-#add pixelformat/sampleformat into the path of the codecs
-
-FFP=../ffprobe
-TMP=$(mktemp) || exit 1
-TARGET=$1
-shift
-
-for v do
-    BASE=$(basename $v)
-    echo $v | egrep -i '(public|private)' >/dev/null && echo Warning $v may be private
-    $FFP $v 2> $TMP
-    FORM=$((grep 'Input #0, ' -m1 $TMP || echo 'Input #0, unknown') | sed 's/Input #0, \([a-zA-Z0-9_]*\).*/\1/' )
-    mkdir -p $TARGET/container/$FORM
-    ln -s $v $TARGET/container/$FORM/$BASE
-    eval $(grep 'Stream #0\.[^:]*: [a-zA-Z0-9][^:]*: [a-zA-Z0-9]' $TMP | sed 's#[^:]*: \([a-zA-Z0-9]*\)[^:]*: \([a-zA-Z0-9]*\).*#mkdir -p '$TARGET'/\1/\2 ; ln -s '$v' '$TARGET'/\1/\2/'$BASE' ; #')
-done
-
-rm $TMP
diff --git a/tools/unwrap-diff b/tools/unwrap-diff
deleted file mode 100755
index ccea99b7b4..0000000000
--- a/tools/unwrap-diff
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-tr '\n' '\001' | sed 's/\x01\x01/\x01 \x01/g' | sed 's/\x01\([^-+ @]\)/ \1/g' | tr '\001' '\n'

From 27bcf55f459e038e81f09c17e72e6d44898b9015 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 15:43:21 +0200
Subject: [PATCH 611/830] vsrc_buffer: add flags param to
 av_vsrc_buffer_add_video_buffer_ref

The new flags parameter allows to specify if the video ref to add
should overwrite the cache, if the flag is not set vsrc_buffer will
complain and abort; otherwise it will clean the already cached video
ref before to overwrite it, thus avoiding a leak.
---
 doc/APIchanges            |  4 ++++
 ffmpeg.c                  |  3 ++-
 libavfilter/avcodec.h     |  5 ++++-
 libavfilter/avfilter.h    |  2 +-
 libavfilter/vsrc_buffer.c | 22 ++++++++++++++--------
 libavfilter/vsrc_buffer.h | 11 ++++++++++-
 6 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 937846ec62..a15e4a2e52 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-06 - xxxxxx - lavfi 2.13.0 - vsrc_buffer.h
+  Make av_vsrc_buffer_add_video_buffer_ref() accepts an additional
+  flags parameter in input.
+
 2011-06-03 - xxxxxx - lavfi 2.12.0 - avfilter_link_free()
   Add avfilter_link_free() function.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index fb619e7777..b18224b039 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1691,7 +1691,8 @@ static int output_packet(AVInputStream *ist, int ist_index,
                          picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
                     picture.pts = ist->pts;
 
-                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture);
+                    av_vsrc_buffer_add_frame(ost->input_video_filter,
+                                             &picture, AV_VSRC_BUF_FLAG_OVERWRITE);
                 }
                 frame_available = ist->st->codec->codec_type != AVMEDIA_TYPE_VIDEO ||
                     !ost->output_video_filter || avfilter_poll_frame(ost->output_video_filter->inputs[0]);
diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h
index 74434e819d..4eed6b2d2c 100644
--- a/libavfilter/avcodec.h
+++ b/libavfilter/avcodec.h
@@ -30,6 +30,7 @@
 
 #include "libavcodec/avcodec.h" // AVFrame
 #include "avfilter.h"
+#include "vsrc_buffer.h"
 
 /**
  * Copy the frame properties of src to dst, without copying the actual
@@ -49,9 +50,11 @@ AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame
  * Add frame data to buffer_src.
  *
  * @param buffer_src pointer to a buffer source context
+ * @param flags a combination of AV_VSRC_BUF_FLAG_* flags
  * @return >= 0 in case of success, a negative AVERROR code in case of
  * failure
  */
-int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, const AVFrame *frame);
+int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src,
+                             const AVFrame *frame, int flags);
 
 #endif /* AVFILTER_AVCODEC_H */
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 4ff68cd180..c7612f2004 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 12
+#define LIBAVFILTER_VERSION_MINOR 13
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c
index 9ba7d4ee47..246444b3ac 100644
--- a/libavfilter/vsrc_buffer.c
+++ b/libavfilter/vsrc_buffer.c
@@ -37,18 +37,23 @@ typedef struct {
     char              sws_param[256];
 } BufferSourceContext;
 
-int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilterBufferRef *picref)
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter,
+                                        AVFilterBufferRef *picref, int flags)
 {
     BufferSourceContext *c = buffer_filter->priv;
     AVFilterLink *outlink = buffer_filter->outputs[0];
     int ret;
 
     if (c->picref) {
-        av_log(buffer_filter, AV_LOG_ERROR,
-               "Buffering several frames is not supported. "
-               "Please consume all available frames before adding a new one.\n"
-            );
-        //return -1;
+        if (flags & AV_VSRC_BUF_FLAG_OVERWRITE) {
+            avfilter_unref_buffer(c->picref);
+            c->picref = NULL;
+        } else {
+            av_log(buffer_filter, AV_LOG_ERROR,
+                   "Buffering several frames is not supported. "
+                   "Please consume all available frames before adding a new one.\n");
+            return AVERROR(EINVAL);
+        }
     }
 
     if (picref->video->w != c->w || picref->video->h != c->h || picref->format != c->pix_fmt) {
@@ -109,14 +114,15 @@ int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, AVFilter
 #if CONFIG_AVCODEC
 #include "avcodec.h"
 
-int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, const AVFrame *frame)
+int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src,
+                             const AVFrame *frame, int flags)
 {
     int ret;
     AVFilterBufferRef *picref =
         avfilter_get_video_buffer_ref_from_frame(frame, AV_PERM_WRITE);
     if (!picref)
         return AVERROR(ENOMEM);
-    ret = av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref);
+    ret = av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref, flags);
     picref->buf->data[0] = NULL;
     avfilter_unref_buffer(picref);
 
diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h
index c717f3dae4..b661d414ea 100644
--- a/libavfilter/vsrc_buffer.h
+++ b/libavfilter/vsrc_buffer.h
@@ -28,13 +28,22 @@
 
 #include "avfilter.h"
 
+/**
+ * Tell av_vsrc_buffer_add_video_buffer_ref() to overwrite the already
+ * cached video buffer with the new added one, otherwise the function
+ * will complain and exit.
+ */
+#define AV_VSRC_BUF_FLAG_OVERWRITE 1
+
 /**
  * Add video buffer data in picref to buffer_src.
  *
  * @param buffer_src pointer to a buffer source context
+ * @param flags a combination of AV_VSRC_BUF_FLAG_* flags
  * @return >= 0 in case of success, a negative AVERROR code in case of
  * failure
  */
-int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_src, AVFilterBufferRef *picref);
+int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_src,
+                                        AVFilterBufferRef *picref, int flags);
 
 #endif /* AVFILTER_VSRC_BUFFER_H */

From 6700aa8810d877cb017d977f12638481df459eb1 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 6 Jun 2011 01:50:27 +0200
Subject: [PATCH 612/830] lavf: remove reference to output-example in Makefile

output-example.c (and renamed to muxing-example.c) has been moved to
doc/examples.
---
 libavformat/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 6d3a0276de..e5ec44bfb6 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -343,5 +343,3 @@ OBJS-$(CONFIG_JACK_INDEV)                += timefilter.o
 TESTPROGS = timefilter
 
 include $(SUBDIR)../subdir.mak
-
-$(SUBDIR)output-example$(EXESUF): ELIBS = -lswscale

From 580817df048fb114529cdb4a82885f551bf62c0c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 6 Jun 2011 04:03:09 +0200
Subject: [PATCH 613/830] Move code for "ffmpeg: fix massive leak occurring
 when seeking" / e4841a404bdabfeafb917454d510b60d888cb761 elsewhere

The picture struct is written to in the loop, so this cannot work.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index b18224b039..995c6c1689 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1656,6 +1656,22 @@ static int output_packet(AVInputStream *ist, int ist_index,
             avpkt.size = 0;
         }
 
+#if CONFIG_AVFILTER
+        if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+        if (start_time == 0 || ist->pts >= start_time) {
+            for(i=0;i<nb_ostreams;i++) {
+                ost = ost_table[i];
+                if (ost->input_video_filter && ost->source_index == ist_index) {
+                    if (!picture.sample_aspect_ratio.num)
+                        picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
+                    picture.pts = ist->pts;
+
+                    av_vsrc_buffer_add_frame(ost->input_video_filter, &picture, AV_VSRC_BUF_FLAG_OVERWRITE);
+                }
+            }
+        }
+#endif
+
         // preprocess audio (volume)
         if (ist->st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
             if (audio_volume != 256) {
@@ -1686,14 +1702,6 @@ static int output_packet(AVInputStream *ist, int ist_index,
                 ost = ost_table[i];
                 if (ost->source_index == ist_index) {
 #if CONFIG_AVFILTER
-                if (ost->input_video_filter) {
-                    if (!picture.sample_aspect_ratio.num)
-                         picture.sample_aspect_ratio = ist->st->sample_aspect_ratio;
-                    picture.pts = ist->pts;
-
-                    av_vsrc_buffer_add_frame(ost->input_video_filter,
-                                             &picture, AV_VSRC_BUF_FLAG_OVERWRITE);
-                }
                 frame_available = ist->st->codec->codec_type != AVMEDIA_TYPE_VIDEO ||
                     !ost->output_video_filter || avfilter_poll_frame(ost->output_video_filter->inputs[0]);
                 while (frame_available) {

From c1dcbfddf9d8c484121824f876a1d8faee26d7fa Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 3 Jun 2011 20:58:01 +0200
Subject: [PATCH 614/830] tty: add framerate private option.

---
 libavformat/tty.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavformat/tty.c b/libavformat/tty.c
index 970274eb54..ecd3f58c77 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -37,6 +37,7 @@ typedef struct {
     int chars_per_frame;
     uint64_t fsize;  /**< file size less metadata buffer */
     char *video_size;/**< A string describing video size, set by a private option. */
+    char *framerate; /**< Set by a private option. */
 } TtyDemuxContext;
 
 /**
@@ -75,6 +76,7 @@ static int read_header(AVFormatContext *avctx,
     TtyDemuxContext *s = avctx->priv_data;
     int width = 0, height = 0, ret = 0;
     AVStream *st = av_new_stream(avctx, 0);
+    AVRational framerate;
 
     if (!st) {
         ret = AVERROR(ENOMEM);
@@ -88,20 +90,21 @@ static int read_header(AVFormatContext *avctx,
         av_log (avctx, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto fail;
     }
+    if ((ret = av_parse_video_rate(&framerate, s->framerate)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s->framerate);
+        goto fail;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         width = ap->width;
     if (ap->height > 0)
         height = ap->height;
+    if (ap->time_base.num)
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
     st->codec->width  = width;
     st->codec->height = height;
-
-    if (!ap->time_base.num) {
-        av_set_pts_info(st, 60, 1, 25);
-    } else {
-        av_set_pts_info(st, 60, ap->time_base.num, ap->time_base.den);
-    }
+    av_set_pts_info(st, 60, framerate.den, framerate.num);
 
     /* simulate tty display speed */
 #if FF_API_FORMAT_PARAMETERS
@@ -152,6 +155,7 @@ static int read_packet(AVFormatContext *avctx, AVPacket *pkt)
 static const AVOption options[] = {
     { "chars_per_frame", "", offsetof(TtyDemuxContext, chars_per_frame), FF_OPT_TYPE_INT, {.dbl = 6000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC },
     { NULL },
 };
 

From f33e2a51d9e704a48a7c72333d1d720633eed98a Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 4 Jun 2011 00:13:35 +0200
Subject: [PATCH 615/830] img2: add pixel_format private option.

---
 libavformat/img2.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/libavformat/img2.c b/libavformat/img2.c
index 1d50fbf567..fdd6a3ce82 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -22,18 +22,23 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include <strings.h>
 
 typedef struct {
+    const AVClass *class;  /**< Class for private options. */
     int img_first;
     int img_last;
     int img_number;
     int img_count;
     int is_pipe;
     char path[1024];
+    char *pixel_format;     /**< Set by a private option. */
 } VideoData;
 
 typedef struct {
@@ -200,6 +205,7 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
     VideoData *s = s1->priv_data;
     int first_index, last_index;
     AVStream *st;
+    enum PixelFormat pix_fmt = PIX_FMT_NONE;
 
     s1->ctx_flags |= AVFMTCTX_NOHEADER;
 
@@ -208,6 +214,15 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         return AVERROR(ENOMEM);
     }
 
+    if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
+        av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
+        return AVERROR(EINVAL);
+    }
+#if FF_API_FORMAT_PARAMETERS
+    if (ap->pix_fmt != PIX_FMT_NONE)
+        pix_fmt = ap->pix_fmt;
+#endif
+
     av_strlcpy(s->path, s1->filename, sizeof(s->path));
     s->img_number = 0;
     s->img_count = 0;
@@ -252,8 +267,8 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
         st->codec->codec_id = av_str2id(img_tags, s->path);
     }
-    if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ap->pix_fmt != PIX_FMT_NONE)
-        st->codec->pix_fmt = ap->pix_fmt;
+    if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO && pix_fmt != PIX_FMT_NONE)
+        st->codec->pix_fmt = pix_fmt;
 
     return 0;
 }
@@ -421,6 +436,20 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
 
 #endif /* CONFIG_IMAGE2_MUXER || CONFIG_IMAGE2PIPE_MUXER */
 
+#define OFFSET(x) offsetof(VideoData, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { NULL },
+};
+
+static const AVClass img2_class = {
+    .class_name = "image2 demuxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 /* input */
 #if CONFIG_IMAGE2_DEMUXER
 AVInputFormat ff_image2_demuxer = {
@@ -431,6 +460,7 @@ AVInputFormat ff_image2_demuxer = {
     .read_header    = read_header,
     .read_packet    = read_packet,
     .flags          = AVFMT_NOFILE,
+    .priv_class     = &img2_class,
 };
 #endif
 #if CONFIG_IMAGE2PIPE_DEMUXER
@@ -440,6 +470,7 @@ AVInputFormat ff_image2pipe_demuxer = {
     .priv_data_size = sizeof(VideoData),
     .read_header    = read_header,
     .read_packet    = read_packet,
+    .priv_class     = &img2_class,
 };
 #endif
 

From a915bf64ccae3b341b7a416d509f7f807ab65777 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 4 Jun 2011 00:17:31 +0200
Subject: [PATCH 616/830] img2: add video_size private option.

---
 libavformat/img2.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/libavformat/img2.c b/libavformat/img2.c
index fdd6a3ce82..802ad6fbc8 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -25,6 +25,7 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/parseutils.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "internal.h"
@@ -39,6 +40,7 @@ typedef struct {
     int is_pipe;
     char path[1024];
     char *pixel_format;     /**< Set by a private option. */
+    char *video_size;       /**< Set by a private option. */
 } VideoData;
 
 typedef struct {
@@ -203,7 +205,8 @@ enum CodecID av_guess_image2_codec(const char *filename){
 static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
 {
     VideoData *s = s1->priv_data;
-    int first_index, last_index;
+    int first_index, last_index, ret = 0;
+    int width = 0, height = 0;
     AVStream *st;
     enum PixelFormat pix_fmt = PIX_FMT_NONE;
 
@@ -218,9 +221,17 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
         return AVERROR(EINVAL);
     }
+    if (s->video_size && (ret = av_parse_video_size(&width, &height, s->video_size)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Could not parse video size: %s.\n", s->video_size);
+        return ret;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->pix_fmt != PIX_FMT_NONE)
         pix_fmt = ap->pix_fmt;
+    if (ap->width > 0)
+        width = ap->width;
+    if (ap->height > 0)
+        height = ap->height;
 #endif
 
     av_strlcpy(s->path, s1->filename, sizeof(s->path));
@@ -241,9 +252,9 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_set_pts_info(st, 60, ap->time_base.num, ap->time_base.den);
     }
 
-    if(ap->width && ap->height){
-        st->codec->width = ap->width;
-        st->codec->height= ap->height;
+    if (width && height) {
+        st->codec->width  = width;
+        st->codec->height = height;
     }
 
     if (!s->is_pipe) {
@@ -440,6 +451,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "video_size",   "", OFFSET(video_size),   FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 

From abcedfac60efa679e18a561bce0e93f8535b60e3 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 4 Jun 2011 00:21:26 +0200
Subject: [PATCH 617/830] img2: add framerate private option.

---
 libavformat/img2.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/libavformat/img2.c b/libavformat/img2.c
index 802ad6fbc8..4e82aa301b 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -41,6 +41,7 @@ typedef struct {
     char path[1024];
     char *pixel_format;     /**< Set by a private option. */
     char *video_size;       /**< Set by a private option. */
+    char *framerate;        /**< Set by a private option. */
 } VideoData;
 
 typedef struct {
@@ -209,6 +210,7 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int width = 0, height = 0;
     AVStream *st;
     enum PixelFormat pix_fmt = PIX_FMT_NONE;
+    AVRational framerate;
 
     s1->ctx_flags |= AVFMTCTX_NOHEADER;
 
@@ -225,6 +227,10 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s, AV_LOG_ERROR, "Could not parse video size: %s.\n", s->video_size);
         return ret;
     }
+    if ((ret = av_parse_video_rate(&framerate, s->framerate)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s->framerate);
+        return ret;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->pix_fmt != PIX_FMT_NONE)
         pix_fmt = ap->pix_fmt;
@@ -232,6 +238,8 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         width = ap->width;
     if (ap->height > 0)
         height = ap->height;
+    if (ap->time_base.num)
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     av_strlcpy(s->path, s1->filename, sizeof(s->path));
@@ -246,11 +254,7 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap)
         st->need_parsing = AVSTREAM_PARSE_FULL;
     }
 
-    if (!ap->time_base.num) {
-        av_set_pts_info(st, 60, 1, 25);
-    } else {
-        av_set_pts_info(st, 60, ap->time_base.num, ap->time_base.den);
-    }
+    av_set_pts_info(st, 60, framerate.den, framerate.num);
 
     if (width && height) {
         st->codec->width  = width;
@@ -452,6 +456,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
 static const AVOption options[] = {
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "video_size",   "", OFFSET(video_size),   FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "framerate",    "", OFFSET(framerate),    FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC },
     { NULL },
 };
 

From b2592ea42ca0d7c81b6e6ae90189d43e3e3fce59 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 4 Jun 2011 00:23:14 +0200
Subject: [PATCH 618/830] lavf: deprecate AVFormatParameters.time_base.

---
 libavformat/avformat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 27cd0f7763..5b67211959 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -227,8 +227,8 @@ typedef struct AVProbeData {
 #define AVPROBE_PADDING_SIZE 32             ///< extra allocated bytes at the end of the probe buffer
 
 typedef struct AVFormatParameters {
-    AVRational time_base;
 #if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated AVRational time_base;
     attribute_deprecated int sample_rate;
     attribute_deprecated int channels;
     attribute_deprecated int width;

From e1c74148128ebed7c7bc9d36c776f24898267174 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 22 Mar 2011 13:29:28 +0100
Subject: [PATCH 619/830] samplefmt: change layout for arrays created by
 av_samples_alloc() and _fill_arrays()

The new layout is consistent with that of the av_image_() API, and
simplifies understanding and copy operations, it also preserves
alignment information which was lost with the previous layout.

This breaks API/ABI, but since the function was never referenced in
the code (and it isn't unlikely already used by someone) then this
should not be a problem.
---
 doc/APIchanges        |  4 ++++
 libavutil/avutil.h    |  2 +-
 libavutil/samplefmt.c | 22 ++++++++++++----------
 libavutil/samplefmt.h |  9 +++++++--
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 89104d0e41..6c32ed9674 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-06 - xxxxxx - lavu 51.5.0 - av_samples_*
+  Change the data layout created by av_samples_fill_arrays() and
+  av_samples_alloc().
+
 2011-06-06 - xxxxxx - lavfi 2.13.0 - vsrc_buffer.h
   Make av_vsrc_buffer_add_video_buffer_ref() accepts an additional
   flags parameter in input.
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index cdd4f7131c..fd5f293859 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  4
+#define LIBAVUTIL_VERSION_MINOR  5
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c
index ea004d926a..ca669da8b5 100644
--- a/libavutil/samplefmt.c
+++ b/libavutil/samplefmt.c
@@ -76,28 +76,30 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8],
                            uint8_t *buf, int nb_channels, int nb_samples,
                            enum AVSampleFormat sample_fmt, int planar, int align)
 {
-    int i, step_size = 0;
+    int i, linesize;
     int sample_size = av_get_bits_per_sample_fmt(sample_fmt) >> 3;
-    int channel_step = planar ? FFALIGN(nb_samples*sample_size, align) : sample_size;
 
-    if(nb_channels * (uint64_t)nb_samples * sample_size >= INT_MAX - align*(uint64_t)nb_channels)
+    if (nb_channels * (uint64_t)nb_samples * sample_size >= INT_MAX - align*(uint64_t)nb_channels)
         return AVERROR(EINVAL);
+    linesize = planar ? FFALIGN(nb_samples*sample_size,             align) :
+                        FFALIGN(nb_samples*sample_size*nb_channels, align);
 
     if (pointers) {
         pointers[0] = buf;
-        for (i = 0; i < nb_channels; i++) {
-            pointers[i] = buf + step_size;
-            step_size += channel_step;
+        for (i = 1; planar && i < nb_channels; i++) {
+            pointers[i] = pointers[i-1] + linesize;
         }
-        memset(&pointers[nb_channels], 0, (8-nb_channels) * sizeof(pointers[0]));
+        memset(&pointers[i], 0, (8-i) * sizeof(pointers[0]));
     }
 
     if (linesizes) {
-        linesizes[0] = planar ?  sample_size : nb_channels*sample_size;
-        memset(&linesizes[1], 0, (8-1) * sizeof(linesizes[0]));
+        linesizes[0] = linesize;
+        for (i = 1; planar && i < nb_channels; i++)
+            linesizes[i] = linesizes[0];
+        memset(&linesizes[i], 0, (8-i) * sizeof(linesizes[0]));
     }
 
-    return planar ? channel_step * nb_channels : FFALIGN(nb_channels*sample_size*nb_samples, align);
+    return planar ? linesize * nb_channels : linesize;
 }
 
 int av_samples_alloc(uint8_t *pointers[8], int linesizes[8],
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index 9b9c0d49a9..a091721d96 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -74,8 +74,13 @@ int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt);
  * format sample_fmt.
  *
  * The pointers array is filled with the pointers to the samples data:
- * data[c] points to the first sample of channel c.
- * data[c] + linesize[0] points to the second sample of channel c
+ * for planar, set the start point of each plane's data within the buffer,
+ * for packed, set the start point of the entire buffer only.
+ *
+ * The linesize array is filled with the aligned size of each samples
+ * plane, that is linesize[i] will contain the linesize of the plane i,
+ * and will be zero for all the unused planes. All linesize values are
+ * equal.
  *
  * @param pointers array to be filled with the pointer for each plane, may be NULL
  * @param linesizes array to be filled with the linesize, may be NULL

From 0bc2cca12f671010eac171bc22049320a7cd61a3 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Thu, 2 Jun 2011 11:14:24 +0200
Subject: [PATCH 620/830] samplefmt: switch nb_channels/nb_samples params order
 in av_samples_alloc()

This is consistent with the order of parameters in
av_samples_fill_arrays().
---
 doc/APIchanges        | 4 ++++
 libavutil/avutil.h    | 2 +-
 libavutil/samplefmt.c | 2 +-
 libavutil/samplefmt.h | 3 ++-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 6c32ed9674..16373e6933 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-06 - xxxxxx - lavu 51.6.0 - av_samples_alloc()
+  Switch nb_channels and nb_samples parameters order in
+  av_samples_alloc().
+
 2011-06-06 - xxxxxx - lavu 51.5.0 - av_samples_*
   Change the data layout created by av_samples_fill_arrays() and
   av_samples_alloc().
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index fd5f293859..1912d20170 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  5
+#define LIBAVUTIL_VERSION_MINOR  6
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c
index ca669da8b5..041aaeaa9f 100644
--- a/libavutil/samplefmt.c
+++ b/libavutil/samplefmt.c
@@ -103,7 +103,7 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8],
 }
 
 int av_samples_alloc(uint8_t *pointers[8], int linesizes[8],
-                     int nb_samples, int nb_channels,
+                     int nb_channels, int nb_samples,
                      enum AVSampleFormat sample_fmt, int planar,
                      int align)
 {
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index a091721d96..30da046628 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -101,6 +101,7 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8],
  * The allocated samples buffer has to be freed by using
  * av_freep(&pointers[0]).
  *
+ * @param nb_channels number of audio channels
  * @param nb_samples number of samples per channel
  * @param planar 1 if the samples layout is planar, 0 if packed,
  * @param align the value to use for buffer size alignment
@@ -109,7 +110,7 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8],
  * @see av_samples_fill_arrays()
  */
 int av_samples_alloc(uint8_t *pointers[8], int linesizes[8],
-                     int nb_samples, int nb_channels,
+                     int nb_channels, int nb_samples,
                      enum AVSampleFormat sample_fmt, int planar,
                      int align);
 

From 95a0242642e8ee345f6545ea7f9b042989272729 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 1 Feb 2011 12:34:23 +0100
Subject: [PATCH 621/830] lavfi: prefer nb_samples over size in
 AVFilterBufferRefAudioProps

Remove AVFilterBufferRefAudioProps.size, and use nb_samples in
avfilter_get_audio_buffer() and avfilter_default_get_audio_buffer() in
place of size.

This is required as the size in the audio buffer may be aligned, so it
may not contain a well defined number of samples.
---
 doc/APIchanges         |  5 +++++
 libavfilter/avfilter.c | 15 ++++++++-------
 libavfilter/avfilter.h | 13 ++++++-------
 libavfilter/defaults.c | 11 +++++------
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 16373e6933..bf34307f47 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,11 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-06 - xxxxxx - lavfi 2.14.0 - AVFilterBufferRefAudioProps
+  Remove AVFilterBufferRefAudioProps.size, and use nb_samples in
+  avfilter_get_audio_buffer() and avfilter_default_get_audio_buffer() in
+  place of size.
+
 2011-06-06 - xxxxxx - lavu 51.6.0 - av_samples_alloc()
   Switch nb_channels and nb_samples parameters order in
   av_samples_alloc().
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index fae3c6c5f2..6d55350f7c 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -305,10 +305,9 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end)
                 av_get_picture_type_char(ref->video->pict_type));
     }
     if (ref->audio) {
-        av_dlog(ctx, " cl:%"PRId64"d sn:%d s:%d sr:%d p:%d",
+        av_dlog(ctx, " cl:%"PRId64"d n:%d r:%d p:%d",
                 ref->audio->channel_layout,
                 ref->audio->nb_samples,
-                ref->audio->size,
                 ref->audio->sample_rate,
                 ref->audio->planar);
     }
@@ -405,16 +404,16 @@ fail:
 }
 
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int size,
+                                             enum AVSampleFormat sample_fmt, int nb_samples,
                                              int64_t channel_layout, int planar)
 {
     AVFilterBufferRef *ret = NULL;
 
     if (link->dstpad->get_audio_buffer)
-        ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar);
+        ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout, planar);
 
     if (!ret)
-        ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar);
+        ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout, planar);
 
     if (ret)
         ret->type = AVMEDIA_TYPE_AUDIO;
@@ -545,6 +544,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
 {
     void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *);
     AVFilterPad *dst = link->dstpad;
+    int i;
 
     FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1);
 
@@ -561,14 +561,15 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
 
         link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms,
                                                           samplesref->format,
-                                                          samplesref->audio->size,
+                                                          samplesref->audio->nb_samples,
                                                           samplesref->audio->channel_layout,
                                                           samplesref->audio->planar);
         link->cur_buf->pts                = samplesref->pts;
         link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
 
         /* Copy actual data into new samples buffer */
-        memcpy(link->cur_buf->data[0], samplesref->data[0], samplesref->audio->size);
+        for (i = 0; samplesref->data[i]; i++)
+            memcpy(link->cur_buf->data[i], samplesref->data[i], samplesref->linesize[0]);
 
         avfilter_unref_buffer(samplesref);
     } else
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index c7612f2004..541dbe7aa7 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 13
+#define LIBAVFILTER_VERSION_MINOR 14
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
@@ -98,8 +98,7 @@ typedef struct AVFilterBuffer {
  */
 typedef struct AVFilterBufferRefAudioProps {
     int64_t channel_layout;     ///< channel layout of audio buffer
-    int nb_samples;             ///< number of audio samples
-    int size;                   ///< audio buffer size
+    int nb_samples;             ///< number of audio samples per channel
     uint32_t sample_rate;       ///< audio buffer sample rate
     int planar;                 ///< audio buffer - planar or packed
 } AVFilterBufferRefAudioProps;
@@ -372,7 +371,7 @@ struct AVFilterPad {
      * Input audio pads only.
      */
     AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms,
-                                           enum AVSampleFormat sample_fmt, int size,
+                                           enum AVSampleFormat sample_fmt, int nb_samples,
                                            int64_t channel_layout, int planar);
 
     /**
@@ -461,7 +460,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link,
 
 /** default handler for get_audio_buffer() for audio inputs */
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt, int size,
+                                                     enum AVSampleFormat sample_fmt, int nb_samples,
                                                      int64_t channel_layout, int planar);
 
 /**
@@ -684,14 +683,14 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int lin
  *                       be requested
  * @param perms          the required access permissions
  * @param sample_fmt     the format of each sample in the buffer to allocate
- * @param size           the buffer size in bytes
+ * @param nb_samples     the number of samples per channel
  * @param channel_layout the number and type of channels per sample in the buffer to allocate
  * @param planar         audio data layout - planar or packed
  * @return               A reference to the samples. This must be unreferenced with
  *                       avfilter_unref_buffer when you are finished with it.
  */
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int size,
+                                             enum AVSampleFormat sample_fmt, int nb_samples,
                                              int64_t channel_layout, int planar);
 
 /**
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index a994f36079..996c0f0589 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -81,7 +81,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
 }
 
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt, int size,
+                                                     enum AVSampleFormat sample_fmt, int nb_samples,
                                                      int64_t channel_layout, int planar)
 {
     AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
@@ -100,7 +100,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
         goto fail;
 
     ref->audio->channel_layout = channel_layout;
-    ref->audio->size           = size;
+    ref->audio->nb_samples     = nb_samples;
     ref->audio->planar         = planar;
 
     /* make sure the buffer gets read permission or it's useless for output */
@@ -112,8 +112,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
     sample_size = av_get_bits_per_sample_fmt(sample_fmt) >>3;
     chans_nb = av_get_channel_layout_nb_channels(channel_layout);
 
-    per_channel_size = size/chans_nb;
-    ref->audio->nb_samples = per_channel_size/sample_size;
+    per_channel_size = nb_samples * sample_size;
 
     /* Set the number of bytes to traverse to reach next sample of a particular channel:
      * For planar, this is simply the sample size.
@@ -124,7 +123,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
     memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0]));
 
     /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
-    bufsize = (size + 15)&~15;
+    bufsize = (nb_samples * chans_nb * sample_size + 15)&~15;
     buf = av_malloc(bufsize);
     if (!buf)
         goto fail;
@@ -212,7 +211,7 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa
 
     if (outlink) {
         outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format,
-                                                             samplesref->audio->size,
+                                                             samplesref->audio->nb_samples,
                                                              samplesref->audio->channel_layout,
                                                              samplesref->audio->planar);
         outlink->out_buf->pts                = samplesref->pts;

From ef28c7b3a4f4513f7ea7c2f118eb806490d79546 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 15 Jan 2011 18:48:37 +0100
Subject: [PATCH 622/830] lavfi: use av_samples_alloc() in
 avfilter_default_get_audio_buffer()

---
 libavfilter/defaults.c | 38 ++++----------------------------------
 1 file changed, 4 insertions(+), 34 deletions(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 996c0f0589..ce8f3cd0da 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -86,8 +86,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
 {
     AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
     AVFilterBufferRef *ref = NULL;
-    int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0;
-    char *buf;
+    int nb_channels = av_get_channel_layout_nb_channels(channel_layout);
 
     if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef))))
         goto fail;
@@ -109,41 +108,12 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
     samples->refcount   = 1;
     samples->free       = ff_avfilter_default_free_buffer;
 
-    sample_size = av_get_bits_per_sample_fmt(sample_fmt) >>3;
-    chans_nb = av_get_channel_layout_nb_channels(channel_layout);
-
-    per_channel_size = nb_samples * sample_size;
-
-    /* Set the number of bytes to traverse to reach next sample of a particular channel:
-     * For planar, this is simply the sample size.
-     * For packed, this is the number of samples * sample_size.
-     */
-    for (i = 0; i < chans_nb; i++)
-        samples->linesize[i] = planar > 0 ? per_channel_size : sample_size;
-    memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0]));
-
     /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
-    bufsize = (nb_samples * chans_nb * sample_size + 15)&~15;
-    buf = av_malloc(bufsize);
-    if (!buf)
+    if (av_samples_alloc(samples->data, samples->linesize,
+                         nb_channels, nb_samples, sample_fmt,
+                         planar, 16) < 0)
         goto fail;
 
-    /* For planar, set the start point of each channel's data within the buffer
-     * For packed, set the start point of the entire buffer only
-     */
-    samples->data[0] = buf;
-    if (buf && planar) {
-        for (i = 1; i < chans_nb; i++) {
-            step_size += per_channel_size;
-            samples->data[i] = buf + step_size;
-        }
-    } else {
-        for (i = 1; i < chans_nb; i++)
-            samples->data[i] = buf;
-    }
-
-    memset(&samples->data[chans_nb], 0, (8-chans_nb) * sizeof(samples->data[0]));
-
     memcpy(ref->data,     samples->data,     sizeof(ref->data));
     memcpy(ref->linesize, samples->linesize, sizeof(ref->linesize));
 

From 263f57c6d76b27e1895c4001f815824a463b1592 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 6 Jun 2011 11:34:45 +0200
Subject: [PATCH 623/830] pixdesc: remove duplicated header inclusion

---
 libavutil/pixdesc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index 0d19212afa..f0de9981ec 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -22,7 +22,6 @@
 #ifndef AVUTIL_PIXDESC_H
 #define AVUTIL_PIXDESC_H
 
-#include "pixfmt.h"
 #include <inttypes.h>
 #include "pixfmt.h"
 

From dc435c4e9dfb10c81f177970a14e64a7b232ba54 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 12:37:06 +0100
Subject: [PATCH 624/830] configure: remove --source-path option

This option does not work, and the implied functionality is
at best pointless.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 2 --
 1 file changed, 2 deletions(-)

diff --git a/configure b/configure
index fe5b700db5..d680337dce 100755
--- a/configure
+++ b/configure
@@ -191,7 +191,6 @@ External library support:
   --enable-zlib            enable zlib [autodetect]
 
 Advanced options (experts only):
-  --source-path=PATH       path to source code [$source_path]
   --cross-prefix=PREFIX    use PREFIX for compilation tools [$cross_prefix]
   --enable-cross-compile   assume a cross-compiler is used
   --sysroot=PATH           root of cross-build tree
@@ -1184,7 +1183,6 @@ CMDLINE_SET="
     nm
     pkg_config
     samples
-    source_path
     sysinclude
     sysroot
     target_exec

From 8477f2deefbebe7dab9dce4c9ed5f3bc8030206d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 12:56:26 +0100
Subject: [PATCH 625/830] configure: simplify source_path setup

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index d680337dce..34c1ed6d25 100755
--- a/configure
+++ b/configure
@@ -1684,13 +1684,12 @@ DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)
 DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM'
 
 # find source path
-source_path="$(dirname "$0")"
-enable source_path_used
 if test -f configure; then
     source_path="$(pwd)"
     disable source_path_used
 else
-    source_path="$(cd "$source_path"; pwd)"
+    source_path=$(cd $(dirname "$0"); pwd)
+    enable source_path_used
     echo "$source_path" | grep -q '[[:blank:]]' &&
         die "Out of tree builds are impossible with whitespace in source path."
     test -e "$source_path/config.h" &&

From ceff045dbecb63bbe42da6d7d33f614ae67fbebd Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 6 Jun 2011 09:27:54 -0400
Subject: [PATCH 626/830] utils.c: fix crash with threading enabled.

---
 libavcodec/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 2b417defbf..1e5886473d 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -783,7 +783,7 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         avctx->codec->close(avctx);
     avcodec_default_free_buffers(avctx);
     avctx->coded_frame = NULL;
-    if (avctx->codec->priv_class)
+    if (avctx->codec && avctx->codec->priv_class)
         av_opt_free(avctx->priv_data);
     av_opt_free(avctx);
     av_freep(&avctx->priv_data);

From 5eaba041a07a667eabd9a0784ead94e8d83c51fc Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 22:16:16 +0100
Subject: [PATCH 627/830] build: do not add CFLAGS-yes to CFLAGS

CFLAGS-yes is never set so this serves no purpose.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 common.mak | 1 -
 1 file changed, 1 deletion(-)

diff --git a/common.mak b/common.mak
index 20876c0951..5511a3016d 100644
--- a/common.mak
+++ b/common.mak
@@ -73,7 +73,6 @@ endif
 
 OBJS-$(HAVE_MMX) +=  $(MMX-OBJS-yes)
 
-CFLAGS    += $(CFLAGS-yes)
 OBJS      += $(OBJS-yes)
 FFLIBS    := $(FFLIBS-yes) $(FFLIBS)
 TESTPROGS += $(TESTPROGS-yes)

From 171ae1eb0da284dec03c996ac68920823a71644d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 13:21:05 +0100
Subject: [PATCH 628/830] build: remove stale dependency

This dependency is implicitly covered elsewhere.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 common.mak | 2 --
 1 file changed, 2 deletions(-)

diff --git a/common.mak b/common.mak
index 5511a3016d..5195c9911d 100644
--- a/common.mak
+++ b/common.mak
@@ -45,8 +45,6 @@ HOSTCFLAGS += $(IFLAGS)
 %.ho: %.h
 	$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $<
 
-%$(EXESUF): %.c
-
 %.ver: %.v
 	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
 

From 0e28e9ca8f0025c34c3c6df8bf699a9a2db43abe Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Mon, 6 Jun 2011 09:13:05 -0500
Subject: [PATCH 629/830] flvenc: propagate error properly

avio_flush can fail, in particular when used with the rtmp/librtmp
protocol.
---
 libavformat/flvenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index b8ae113905..b20a3f80c5 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -434,7 +434,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     av_free(data);
 
-    return 0;
+    return pb->error;
 }
 
 AVOutputFormat ff_flv_muxer = {

From d7a72d250b0deeaed68798f23476be30b28af064 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 21:52:32 +0100
Subject: [PATCH 630/830] build: move all (un)install* target aliases to
 toplevel Makefile

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile   | 6 ++++--
 common.mak | 5 -----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index f626332482..0fb217a3ab 100644
--- a/Makefile
+++ b/Makefile
@@ -128,7 +128,9 @@ doc/%.1: TAG = MAN
 doc/%.1: doc/%.pod
 	$(M)pod2man --section=1 --center=" " --release=" " $< > $@
 
-install: $(INSTALL_TARGETS-yes)
+install: install-libs install-headers $(INSTALL_TARGETS-yes)
+
+install-libs: install-libs-yes
 
 install-progs: $(PROGS) $(INSTALL_PROGS_TARGETS-yes)
 	$(Q)mkdir -p "$(BINDIR)"
@@ -142,7 +144,7 @@ install-man: $(MANPAGES)
 	$(Q)mkdir -p "$(MANDIR)/man1"
 	$(INSTALL) -m 644 $(MANPAGES) "$(MANDIR)/man1"
 
-uninstall: uninstall-progs uninstall-data uninstall-man
+uninstall: uninstall-libs uninstall-headers uninstall-progs uninstall-data uninstall-man
 
 uninstall-progs:
 	$(RM) $(addprefix "$(BINDIR)/", $(ALLPROGS))
diff --git a/common.mak b/common.mak
index 5195c9911d..d50d8234e9 100644
--- a/common.mak
+++ b/common.mak
@@ -54,11 +54,6 @@ HOSTCFLAGS += $(IFLAGS)
 %.h:
 	@:
 
-install: install-libs install-headers
-install-libs: install-libs-yes
-
-uninstall: uninstall-libs uninstall-headers
-
 .PHONY: all depend dep *clean install* uninstall* examples testprogs
 
 # Disable suffix rules.  Most of the builtin rules are suffix rules,

From 0018b7f04378a0ff83c6c6d097fc6bdc97212970 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 21:57:03 +0100
Subject: [PATCH 631/830] build: clean up .PHONY lists

This removes nonexisting targets from phony lists and puts them
all in one place.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile   | 3 ++-
 common.mak | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 0fb217a3ab..708f5e5134 100644
--- a/Makefile
+++ b/Makefile
@@ -289,4 +289,5 @@ $(FATE): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 fate-list:
 	@printf '%s\n' $(sort $(FATE))
 
-.PHONY: documentation *test regtest-* alltools check config
+.PHONY: all alltools *clean check config documentation examples install*
+.PHONY: *test testprogs uninstall*
diff --git a/common.mak b/common.mak
index d50d8234e9..3fbf1a7629 100644
--- a/common.mak
+++ b/common.mak
@@ -54,8 +54,6 @@ HOSTCFLAGS += $(IFLAGS)
 %.h:
 	@:
 
-.PHONY: all depend dep *clean install* uninstall* examples testprogs
-
 # Disable suffix rules.  Most of the builtin rules are suffix rules,
 # so this saves some time on slow systems.
 .SUFFIXES:

From 21c65125424ef3dd7e276dea14f8e8ef3292e388 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 13:44:28 +0100
Subject: [PATCH 632/830] ARM: remove MUL64 and MAC64 inline asm

Current GCC versions know how to generate these instructions
properly and avoiding inline asm gives better code.  The MULH
function for ARMv5 uses the same instruction and is also not
needed any more.

The MLS64 macro remains since negating an input would normally
not be allowed as it would fail for INT_MIN.  In our uses, the
inputs never have this value and thus negating is safe.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mathops.h | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 3870fce3e2..b27b18f871 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -28,45 +28,16 @@
 
 #if HAVE_INLINE_ASM
 
-#define MULH MULH
-#define MUL64 MUL64
-
 #if HAVE_ARMV6
+#define MULH MULH
 static inline av_const int MULH(int a, int b)
 {
     int r;
     __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
     return r;
 }
-
-static inline av_const int64_t MUL64(int a, int b)
-{
-    int64_t x;
-    __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b));
-    return x;
-}
-#else
-static inline av_const int MULH(int a, int b)
-{
-    int lo, hi;
-    __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
-    return hi;
-}
-
-static inline av_const int64_t MUL64(int a, int b)
-{
-    int64_t x;
-    __asm__ ("smull %Q0, %R0, %1, %2" : "=&r"(x) : "r"(a), "r"(b));
-    return x;
-}
 #endif
 
-static inline av_const int64_t MAC64(int64_t d, int a, int b)
-{
-    __asm__ ("smlal %Q0, %R0, %1, %2" : "+r"(d) : "r"(a), "r"(b));
-    return d;
-}
-#define MAC64(d, a, b) ((d) = MAC64(d, a, b))
 #define MLS64(d, a, b) MAC64(d, -(a), b)
 
 #if HAVE_ARMV5TE

From 4f8da7e7dc9244342da6c97ca489a8e6f712f3b4 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Sat, 4 Jun 2011 14:36:30 +0200
Subject: [PATCH 633/830] s302m: fix resampling for 16 and 24bits.

---
 libavcodec/s302m.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index 9d623efa8f..2e261b612e 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c
@@ -97,7 +97,7 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data,
             *o++ = (av_reverse[buf[6] & 0xf0] << 28) |
                    (av_reverse[buf[5]]        << 20) |
                    (av_reverse[buf[4]]        << 12) |
-                   (av_reverse[buf[3] & 0x0f] <<  8);
+                   (av_reverse[buf[3] & 0x0f] <<  4);
             buf += 7;
         }
         *data_size = (uint8_t*) o - (uint8_t*) data;
@@ -120,7 +120,7 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data,
                     av_reverse[buf[0]];
             *o++ = (av_reverse[buf[4] & 0xf0] << 12) |
                    (av_reverse[buf[3]]        <<  4) |
-                    av_reverse[buf[2] & 0x0f];
+                   (av_reverse[buf[2]]        >>  4);
             buf += 5;
         }
         *data_size = (uint8_t*) o - (uint8_t*) data;

From d93d7349ceb963150100d3c1b8649d234e396616 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 19:57:10 +0100
Subject: [PATCH 634/830] build: rearrange some lines in a more logical way

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 708f5e5134..f745a939bc 100644
--- a/Makefile
+++ b/Makefile
@@ -74,12 +74,15 @@ endef
 
 $(foreach D,$(FFLIBS),$(eval $(call DOSUBDIR,lib$(D))))
 
+ffplay.o: CFLAGS += $(SDL_CFLAGS)
 ffplay$(EXESUF): FF_EXTRALIBS += $(SDL_LIBS)
 ffserver$(EXESUF): FF_LDFLAGS += $(FFSERVERLDFLAGS)
 
 %$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
 	$(LD) $(FF_LDFLAGS) -o $@ $< cmdutils.o $(FF_EXTRALIBS)
 
+alltools: $(TOOLS)
+
 tools/%$(EXESUF): tools/%.o
 	$(LD) $(FF_LDFLAGS) -o $@ $< $(FF_EXTRALIBS)
 
@@ -89,8 +92,6 @@ tools/%.o: tools/%.c
 -include $(wildcard tools/*.d)
 -include $(wildcard tests/*.d)
 
-ffplay.o: CFLAGS += $(SDL_CFLAGS)
-
 VERSION_SH  = $(SRC_PATH_BARE)/version.sh
 GIT_LOG     = $(SRC_PATH_BARE)/.git/logs/HEAD
 
@@ -104,8 +105,6 @@ version.h .version:
 # force version.sh to run whenever version might have changed
 -include .version
 
-alltools: $(TOOLS)
-
 DOCS = $(addprefix doc/, developer.html faq.html general.html libavfilter.html) $(HTMLPAGES) $(MANPAGES) $(PODPAGES)
 
 documentation: $(DOCS)

From d58ed64a8953d07f33259317a2ea5856d0c91dfd Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 6 Jun 2011 15:00:25 +0200
Subject: [PATCH 635/830] eval: add support for pow() function

It is a more search-friendly alternative to the ^ operator.
---
 doc/eval.texi      | 4 ++++
 libavutil/avutil.h | 2 +-
 libavutil/eval.c   | 5 +++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/eval.texi b/doc/eval.texi
index 25c0ea6524..ac27750851 100644
--- a/doc/eval.texi
+++ b/doc/eval.texi
@@ -79,6 +79,10 @@ Compute the square root of @var{expr}. This is equivalent to
 
 @item not(expr)
 Return 1.0 if @var{expr} is zero, 0.0 otherwise.
+
+@item pow(x, y)
+Compute the power of @var{x} elevated @var{y}, it is equivalent to
+"(@var{x})^(@var{y})".
 @end table
 
 Note that:
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 1912d20170..bd3ff706ea 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -41,7 +41,7 @@
 
 #define LIBAVUTIL_VERSION_MAJOR 51
 #define LIBAVUTIL_VERSION_MINOR  6
-#define LIBAVUTIL_VERSION_MICRO  0
+#define LIBAVUTIL_VERSION_MICRO  1
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/eval.c b/libavutil/eval.c
index 331221515e..5ae90f06d1 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -290,6 +290,7 @@ static int parse_primary(AVExpr **e, Parser *p)
     else if (strmatch(next, "trunc" )) d->type = e_trunc;
     else if (strmatch(next, "sqrt"  )) d->type = e_sqrt;
     else if (strmatch(next, "not"   )) d->type = e_not;
+    else if (strmatch(next, "pow"   )) d->type = e_pow;
     else {
         for (i=0; p->func1_names && p->func1_names[i]; i++) {
             if (strmatch(next, p->func1_names[i])) {
@@ -643,6 +644,10 @@ int main(void)
         "not(1)",
         "not(NAN)",
         "not(0)",
+        "pow(0,1.23)",
+        "pow(PI,1.23)",
+        "PI^1.23",
+        "pow(-1,1.23)",
         NULL
     };
 

From 6d170962bd611d6336362d5da067d5a757d627f7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 23:00:26 +0100
Subject: [PATCH 636/830] build: make rule for linking ff* apply only to these
 targets

This ensures that the special link command using cmdutils.o
only applies to the targets it should.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f745a939bc..db1a9bd150 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,7 @@ ffplay.o: CFLAGS += $(SDL_CFLAGS)
 ffplay$(EXESUF): FF_EXTRALIBS += $(SDL_LIBS)
 ffserver$(EXESUF): FF_LDFLAGS += $(FFSERVERLDFLAGS)
 
-%$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
+$(PROGS): %$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
 	$(LD) $(FF_LDFLAGS) -o $@ $< cmdutils.o $(FF_EXTRALIBS)
 
 alltools: $(TOOLS)

From b9c6c7cb25932b594fd684a0cb553e439d49fe12 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 6 Jun 2011 21:46:18 +0100
Subject: [PATCH 637/830] build: remove empty $(OBJS) target

This target was added to prevent some files being deleted
by make when using chains of implicit rules.  This is no
longer required.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 common.mak | 2 --
 1 file changed, 2 deletions(-)

diff --git a/common.mak b/common.mak
index 3fbf1a7629..b5ccadbe6e 100644
--- a/common.mak
+++ b/common.mak
@@ -58,8 +58,6 @@ HOSTCFLAGS += $(IFLAGS)
 # so this saves some time on slow systems.
 .SUFFIXES:
 
-# Do not delete intermediate files from chains of implicit rules
-$(OBJS):
 endif
 
 OBJS-$(HAVE_MMX) +=  $(MMX-OBJS-yes)

From a39bd458a0df295df59242a8e18e1e5313bd3318 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 6 Jun 2011 22:59:23 +0200
Subject: [PATCH 638/830] mem: Trying to workaround posix_memalign() bug on OSX

This patch is based on a patch by John Stebbins

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/mem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/mem.c b/libavutil/mem.c
index 29ecbfa055..87c2008a27 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -86,6 +86,7 @@ void *av_malloc(size_t size)
     ptr = (char*)ptr + diff;
     ((char*)ptr)[-1]= diff;
 #elif HAVE_POSIX_MEMALIGN
+    if (size) //OSX on SDK 10.6 has a broken posix_memalign implementation
     if (posix_memalign(&ptr,ALIGN,size))
         ptr = NULL;
 #elif HAVE_MEMALIGN

From 3a1aaf7b21c7bde9f4c3d5342baa64d3e940d05f Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 7 Jun 2011 03:14:28 +0200
Subject: [PATCH 639/830] ffplay: Fix -vismv

Fixes Ticket164

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffplay.c               | 2 +-
 libavcodec/mpegvideo.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ffplay.c b/ffplay.c
index 3ebcd59c1c..77b1e4e24c 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1622,7 +1622,7 @@ static int input_request_frame(AVFilterLink *link)
     if (ret < 0)
         return -1;
 
-    if(priv->use_dr1) {
+    if(priv->use_dr1 && priv->frame->opaque) {
         picref = avfilter_ref_buffer(priv->frame->opaque, ~0);
     } else {
         picref = avfilter_get_video_buffer(link, AV_PERM_WRITE, link->w, link->h);
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index c599d1df3b..9397c657ad 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1410,6 +1410,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
             pict->data[i]= s->visualization_buffer[i];
         }
         pict->type= FF_BUFFER_TYPE_COPY;
+        pict->opaque= NULL;
         ptr= pict->data[0];
         block_height = 16>>v_chroma_shift;
 

From 40da61eff5acaaaec7d1247f7be4bb10fca30c1c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 7 Jun 2011 04:43:03 +0200
Subject: [PATCH 640/830] Makefile: critical build fix after the merge. make
 fate passed locally due to ffmpeg/ffmpeg_g being there from before

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2ac30ef5cd..cb3e60e7d8 100644
--- a/Makefile
+++ b/Makefile
@@ -84,7 +84,7 @@ ffplay.o: CFLAGS += $(SDL_CFLAGS)
 ffplay_g$(EXESUF): FF_EXTRALIBS += $(SDL_LIBS)
 ffserver_g$(EXESUF): FF_LDFLAGS += $(FFSERVERLDFLAGS)
 
-$(PROGS): %_g$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
+%_g$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
 	$(LD) $(FF_LDFLAGS) -o $@ $< cmdutils.o $(FF_EXTRALIBS)
 
 alltools: $(TOOLS)

From dc25d79f4980fb547908f4dd43732ebc0622610c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 01:28:22 +0200
Subject: [PATCH 641/830] Drop unnecessary directory prefixes from #include
 directives.

---
 libavcodec/aac_tablegen.h | 2 +-
 libavcodec/dnxhdenc.h     | 5 +++--
 libavcodec/mlpdsp.c       | 2 +-
 libavcodec/v210enc.c      | 2 +-
 libavformat/mmsh.c        | 2 +-
 libavutil/audioconvert.c  | 4 ++--
 libavutil/eval.c          | 2 +-
 libavutil/imgutils.c      | 2 +-
 libavutil/imgutils.h      | 2 +-
 libavutil/parseutils.c    | 9 +++++----
 libavutil/parseutils.h    | 2 +-
 11 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/libavcodec/aac_tablegen.h b/libavcodec/aac_tablegen.h
index 4486e1a9fc..8773d9b975 100644
--- a/libavcodec/aac_tablegen.h
+++ b/libavcodec/aac_tablegen.h
@@ -29,7 +29,7 @@
 #include "libavcodec/aac_tables.h"
 #else
 #include "libavutil/mathematics.h"
-#include "libavcodec/aac.h"
+#include "aac.h"
 float ff_aac_pow2sf_tab[428];
 
 void ff_aac_tableinit(void)
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index 43f65e46dd..bb24540a9f 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -25,8 +25,9 @@
 #define AVCODEC_DNXHDENC_H
 
 #include <stdint.h>
-#include "libavcodec/mpegvideo.h"
-#include "libavcodec/dnxhddata.h"
+
+#include "mpegvideo.h"
+#include "dnxhddata.h"
 
 typedef struct {
     uint16_t mb;
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index 0053df88d8..7d01c7586d 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/mlp.h"
 #include "dsputil.h"
+#include "mlp.h"
 
 static void ff_mlp_filter_channel(int32_t *state, const int32_t *coeff,
                                   int firorder, int iirorder,
diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index dcaaac0dc5..8b022fa8f8 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -22,7 +22,7 @@
  */
 
 #include "avcodec.h"
-#include "libavcodec/bytestream.h"
+#include "bytestream.h"
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
diff --git a/libavformat/mmsh.c b/libavformat/mmsh.c
index 203691e2fc..b19973ea76 100644
--- a/libavformat/mmsh.c
+++ b/libavformat/mmsh.c
@@ -28,7 +28,7 @@
 #include <string.h>
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
-#include "libavformat/internal.h"
+#include "internal.h"
 #include "mms.h"
 #include "asf.h"
 #include "http.h"
diff --git a/libavutil/audioconvert.c b/libavutil/audioconvert.c
index f789c77e20..fa27f0168b 100644
--- a/libavutil/audioconvert.c
+++ b/libavutil/audioconvert.c
@@ -23,8 +23,8 @@
  * audio conversion routines
  */
 
-#include "libavutil/avutil.h"
-#include "libavutil/avstring.h"
+#include "avstring.h"
+#include "avutil.h"
 #include "audioconvert.h"
 
 static const char * const channel_names[] = {
diff --git a/libavutil/eval.c b/libavutil/eval.c
index a6a59a39ef..a3788210e3 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -26,7 +26,7 @@
  * see http://joe.hotchkiss.com/programming/eval/eval.html
  */
 
-#include "libavutil/avutil.h"
+#include "avutil.h"
 #include "eval.h"
 
 typedef struct Parser {
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 544007f120..46853cafcb 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -23,7 +23,7 @@
 
 #include "imgutils.h"
 #include "internal.h"
-#include "libavutil/pixdesc.h"
+#include "pixdesc.h"
 
 void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
                                 const AVPixFmtDescriptor *pixdesc)
diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h
index 7abeb657fb..b569eb1ca4 100644
--- a/libavutil/imgutils.h
+++ b/libavutil/imgutils.h
@@ -24,8 +24,8 @@
  * misc image utilities
  */
 
-#include "libavutil/pixdesc.h"
 #include "avutil.h"
+#include "pixdesc.h"
 
 /**
  * Compute the max pixel step for each plane of an image with a
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index cc901314d9..0272c3ef67 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -24,11 +24,12 @@
 #include <strings.h>
 #include <sys/time.h>
 #include <time.h>
+
+#include "avstring.h"
+#include "avutil.h"
+#include "eval.h"
+#include "random_seed.h"
 #include "parseutils.h"
-#include "libavutil/avutil.h"
-#include "libavutil/eval.h"
-#include "libavutil/avstring.h"
-#include "libavutil/random_seed.h"
 
 typedef struct {
     const char *abbr;
diff --git a/libavutil/parseutils.h b/libavutil/parseutils.h
index c0f9aec392..befbbb56a1 100644
--- a/libavutil/parseutils.h
+++ b/libavutil/parseutils.h
@@ -19,7 +19,7 @@
 #ifndef AVUTIL_PARSEUTILS_H
 #define AVUTIL_PARSEUTILS_H
 
-#include "libavutil/rational.h"
+#include "rational.h"
 
 /**
  * @file

From e01bb264e78385367b716a55df713ae1a6750310 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 15:27:51 +0200
Subject: [PATCH 642/830] inverse.c: Replace unnecessary intmath.h header by
 necessary stdint.h.

---
 libavutil/inverse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/inverse.c b/libavutil/inverse.c
index bca61832ef..5a5c490e0a 100644
--- a/libavutil/inverse.c
+++ b/libavutil/inverse.c
@@ -19,7 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/intmath.h"
+#include <stdint.h>
 
 /* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
  * for a>16909558, is an overestimate by less than 1 part in 1<<24 */

From 4ae28eb85351528e37ded59336e3f763497813b7 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 7 Jun 2011 02:18:19 +0200
Subject: [PATCH 643/830] showfiltfmts: use av_get_pix_fmt_name()

Use av_get_pix_fmt_name() rather than access
av_pix_fmt_descriptors. Improve readability.
---
 tools/lavfi-showfiltfmts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lavfi-showfiltfmts.c b/tools/lavfi-showfiltfmts.c
index 19bb6f6819..cc19e00a8c 100644
--- a/tools/lavfi-showfiltfmts.c
+++ b/tools/lavfi-showfiltfmts.c
@@ -81,7 +81,7 @@ int main(int argc, char **argv)
         for (j = 0; j < fmts->format_count; j++)
             printf("INPUT[%d] %s: %s\n",
                    i, filter_ctx->filter->inputs[i].name,
-                   av_pix_fmt_descriptors[fmts->formats[j]].name);
+                   av_get_pix_fmt_name(fmts->formats[j]));
     }
 
     /* print the supported formats in output */
@@ -90,7 +90,7 @@ int main(int argc, char **argv)
         for (j = 0; j < fmts->format_count; j++)
             printf("OUTPUT[%d] %s: %s\n",
                    i, filter_ctx->filter->outputs[i].name,
-                   av_pix_fmt_descriptors[fmts->formats[j]].name);
+                   av_get_pix_fmt_name(fmts->formats[j]));
     }
 
     avfilter_free(filter_ctx);

From d7cf5639e727b454896e7d7d8da2abcd4b631eac Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 00:39:39 +0200
Subject: [PATCH 644/830] Replace some printf instances in debug code by
 av_log.

---
 libavformat/avidec.c | 4 ++--
 libavformat/mpeg.c   | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index ff270edae5..dd2d1d0da6 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -1206,7 +1206,7 @@ static int avi_load_index(AVFormatContext *s)
     if (avio_seek(pb, avi->movi_end, SEEK_SET) < 0)
         goto the_end; // maybe truncated file
 #ifdef DEBUG_SEEK
-    printf("movi_end=0x%"PRIx64"\n", avi->movi_end);
+    av_log(s, AV_LOG_DEBUG, "movi_end=0x%"PRIx64"\n", avi->movi_end);
 #endif
     for(;;) {
         if (pb->eof_reached)
@@ -1214,7 +1214,7 @@ static int avi_load_index(AVFormatContext *s)
         tag = avio_rl32(pb);
         size = avio_rl32(pb);
 #ifdef DEBUG_SEEK
-        printf("tag=%c%c%c%c size=0x%x\n",
+        av_log(s, AV_LOG_DEBUG, "tag=%c%c%c%c size=0x%x\n",
                tag & 0xff,
                (tag >> 8) & 0xff,
                (tag >> 16) & 0xff,
diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index 98506612cd..86cee0c4b8 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -593,7 +593,7 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index,
         len = mpegps_read_pes_header(s, &pos, &startcode, &pts, &dts);
         if (len < 0) {
 #ifdef DEBUG_SEEK
-            printf("none (ret=%d)\n", len);
+            av_log(s, AV_LOG_DEBUG, "none (ret=%d)\n", len);
 #endif
             return AV_NOPTS_VALUE;
         }
@@ -604,7 +604,8 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index,
         avio_skip(s->pb, len);
     }
 #ifdef DEBUG_SEEK
-    printf("pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n", pos, dts, dts / 90000.0);
+    av_log(s, AV_LOG_DEBUG, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n",
+           pos, dts, dts / 90000.0);
 #endif
     *ppos = pos;
     return dts;

From f8ea0eb6ff5719a3ff2a60454dd0a2b07aa6dbe2 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 00:52:28 +0200
Subject: [PATCH 645/830] svq1dec: Fix debug statements that referenced
 non-existing context.

---
 libavcodec/svq1dec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index fc36bc3489..2a80374569 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -237,7 +237,7 @@ static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int
     }
 
     if ((stages > 0) && (level >= 4)) {
-      av_dlog(s->avctx,
+      av_dlog(NULL,
               "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",
               stages, level);
       return -1;        /* invalid vector */
@@ -287,7 +287,7 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
     if (stages == -1) continue; /* skip vector */
 
     if ((stages > 0) && (level >= 4)) {
-      av_dlog(s->avctx,
+      av_dlog(NULL,
               "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",
               stages, level);
       return -1;        /* invalid vector */

From 1f6b9cc31d086860c7a7887685bed321fe3843f4 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 00:53:31 +0200
Subject: [PATCH 646/830] Replace some nonstandard DEBUG_* preprocessor
 directives by plain DEBUG.

---
 ffplay.c                               | 5 +----
 libavcodec/dvbsubdec.c                 | 7 +++----
 libavcodec/ppc/h264_template_altivec.c | 3 +--
 libavformat/rtspdec.c                  | 7 -------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index e036bbdf94..4ccad0618d 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -56,9 +56,6 @@
 const char program_name[] = "ffplay";
 const int program_birth_year = 2003;
 
-//#define DEBUG
-//#define DEBUG_SYNC
-
 #define MAX_QUEUE_SIZE (15 * 1024 * 1024)
 #define MIN_AUDIOQ_SIZE (20 * 16 * 1024)
 #define MIN_FRAMES 5
@@ -2056,7 +2053,7 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
             n = 2 * dec->channels;
             is->audio_clock += (double)data_size /
                 (double)(n * dec->sample_rate);
-#if defined(DEBUG_SYNC)
+#ifdef DEBUG
             {
                 static double last_clock;
                 printf("audio: delay=%0.3f clock=%0.3f pts=%0.3f\n",
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index 2c27f6b527..b6b57b8915 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -26,7 +26,6 @@
 
 //#define DEBUG
 //#define DEBUG_PACKET_CONTENTS
-//#define DEBUG_SAVE_IMAGES
 
 #define DVBSUB_PAGE_SEGMENT     0x10
 #define DVBSUB_REGION_SEGMENT   0x11
@@ -37,7 +36,7 @@
 
 #define cm (ff_cropTbl + MAX_NEG_CROP)
 
-#ifdef DEBUG_SAVE_IMAGES
+#ifdef DEBUG
 #undef fprintf
 #if 0
 static void png_save(const char *filename, uint8_t *bitmap, int w, int h,
@@ -1174,7 +1173,7 @@ static void dvbsub_parse_page_segment(AVCodecContext *avctx,
 }
 
 
-#ifdef DEBUG_SAVE_IMAGES
+#ifdef DEBUG
 static void save_display_set(DVBSubContext *ctx)
 {
     DVBSubRegion *region;
@@ -1387,7 +1386,7 @@ static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
 
     sub->num_rects = i;
 
-#ifdef DEBUG_SAVE_IMAGES
+#ifdef DEBUG
     save_display_set(ctx);
 #endif
 
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c
index 020d7c7023..2a8f4bf672 100644
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -18,8 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-//#define DEBUG_ALIGNMENT
-#ifdef DEBUG_ALIGNMENT
+#ifdef DEBUG
 #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F));
 #else
 #define ASSERT_ALIGNED(ptr) ;
diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index fa6bc05517..9cd9be1d4b 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c
@@ -31,9 +31,6 @@
 #include "rdt.h"
 #include "url.h"
 
-//#define DEBUG
-//#define DEBUG_RTP_TCP
-
 static int rtsp_read_play(AVFormatContext *s)
 {
     RTSPState *rt = s->priv_data;
@@ -191,9 +188,7 @@ int ff_rtsp_tcp_read_packet(AVFormatContext *s, RTSPStream **prtsp_st,
     int id, len, i, ret;
     RTSPStream *rtsp_st;
 
-#ifdef DEBUG_RTP_TCP
     av_dlog(s, "tcp_read_packet:\n");
-#endif
 redo:
     for (;;) {
         RTSPMessageHeader reply;
@@ -212,9 +207,7 @@ redo:
         return -1;
     id  = buf[0];
     len = AV_RB16(buf + 1);
-#ifdef DEBUG_RTP_TCP
     av_dlog(s, "id=%d len=%d\n", id, len);
-#endif
     if (len > buf_size || len < 12)
         goto redo;
     /* get the data */

From 02a8d43adf7063bda2d20039d3e9b19d8469fe69 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 01:25:32 +0200
Subject: [PATCH 647/830] Replace some av_log/printf + #ifdef combinations by
 av_dlog.

---
 libavcodec/bitstream.c     | 26 ++++++------------------
 libavcodec/dv.c            | 28 +++++++-------------------
 libavcodec/dvbsub_parser.c | 16 ++++-----------
 libavcodec/dvbsubdec.c     | 41 ++++++++++++--------------------------
 libavcodec/pgssubdec.c     | 13 ++++--------
 libavformat/mov.c          |  9 +++------
 6 files changed, 37 insertions(+), 96 deletions(-)

diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index 329ec9527e..97d5b49105 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -81,8 +81,6 @@ void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
 
 /* VLC decoding */
 
-//#define DEBUG_VLC
-
 #define GET_DATA(v, table, i, wrap, size) \
 {\
     const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
@@ -162,10 +160,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
 
     table_size = 1 << table_nb_bits;
     table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
-#ifdef DEBUG_VLC
-    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
-           table_index, table_size);
-#endif
+    av_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
     if (table_index < 0)
         return -1;
     table = &vlc->table[table_index];
@@ -180,9 +175,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
         n = codes[i].bits;
         code = codes[i].code;
         symbol = codes[i].symbol;
-#if defined(DEBUG_VLC) && 0
-        av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
-#endif
+        av_dlog(NULL, "i=%d n=%d code=0x%x\n", i, n, code);
         if (n <= table_nb_bits) {
             /* no need to add another table */
             j = code >> (32 - table_nb_bits);
@@ -193,10 +186,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
                 inc = 1 << n;
             }
             for (k = 0; k < nb; k++) {
-#ifdef DEBUG_VLC
-                av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
-                       j, i, n);
-#endif
+                av_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
                 if (table[j][1] /*bits*/ != 0) {
                     av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
                     return -1;
@@ -226,10 +216,8 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
             subtable_bits = FFMIN(subtable_bits, table_nb_bits);
             j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
             table[j][1] = -subtable_bits;
-#ifdef DEBUG_VLC
-            av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
-                   j, codes[i].bits + table_nb_bits);
-#endif
+            av_dlog(NULL, "%4x: n=%d (subtable)\n",
+                    j, codes[i].bits + table_nb_bits);
             index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
             if (index < 0)
                 return -1;
@@ -291,9 +279,7 @@ int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
         vlc->table_size = 0;
     }
 
-#ifdef DEBUG_VLC
-    av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
-#endif
+    av_dlog(NULL, "build table nb_codes=%d\n", nb_codes);
 
     buf = av_malloc((nb_codes+1)*sizeof(VLCcode));
 
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index cd682ecb2b..4fde2a7899 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -358,9 +358,6 @@ static av_cold int dvvideo_init_encoder(AVCodecContext *avctx)
     return dvvideo_init(avctx);
 }
 
-// #define VLC_DEBUG
-// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
-
 typedef struct BlockInfo {
     const uint32_t *factor_table;
     const uint8_t *scan_table;
@@ -404,9 +401,8 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
 
     /* get the AC coefficients until last_index is reached */
     for (;;) {
-#ifdef VLC_DEBUG
-        printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
-#endif
+        av_dlog(NULL, "%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16),
+                re_index);
         /* our own optimized GET_RL_VLC */
         index   = NEG_USR32(re_cache, TEX_VLC_BITS);
         vlc_len = dv_rl_vlc[index].len;
@@ -427,9 +423,7 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
         }
         re_index += vlc_len;
 
-#ifdef VLC_DEBUG
-        printf("run=%d level=%d\n", run, level);
-#endif
+        av_dlog(NULL, "run=%d level=%d\n", run, level);
         pos += run;
         if (pos >= 64)
             break;
@@ -533,9 +527,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
             mb->pos               = 0;
             mb->partial_bit_count = 0;
 
-#ifdef VLC_DEBUG
-            printf("MB block: %d, %d ", mb_index, j);
-#endif
+            av_dlog(avctx, "MB block: %d, %d ", mb_index, j);
             dv_decode_ac(&gb, mb, block);
 
             /* write the remaining bits  in a new buffer only if the
@@ -548,9 +540,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
         }
 
         /* pass 2 : we can do it just after */
-#ifdef VLC_DEBUG
-        printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
-#endif
+        av_dlog(avctx, "***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
         block = block1;
         mb    = mb1;
         init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
@@ -570,9 +560,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     }
 
     /* we need a pass other the whole video segment */
-#ifdef VLC_DEBUG
-    printf("***pass 3 size=%d\n", put_bits_count(&vs_pb));
-#endif
+    av_dlog(avctx, "***pass 3 size=%d\n", put_bits_count(&vs_pb));
     block = &sblock[0][0];
     mb    = mb_data;
     init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb));
@@ -580,9 +568,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     for (mb_index = 0; mb_index < 5; mb_index++) {
         for (j = 0; j < s->sys->bpm; j++) {
             if (mb->pos < 64) {
-#ifdef VLC_DEBUG
-                printf("start %d:%d\n", mb_index, j);
-#endif
+                av_dlog(avctx, "start %d:%d\n", mb_index, j);
                 dv_decode_ac(&gb, mb, block);
             }
             if (mb->pos >= 64 && mb->pos < 127)
diff --git a/libavcodec/dvbsub_parser.c b/libavcodec/dvbsub_parser.c
index b54aac6b99..8f3b0653d4 100644
--- a/libavcodec/dvbsub_parser.c
+++ b/libavcodec/dvbsub_parser.c
@@ -22,9 +22,6 @@
 #include "dsputil.h"
 #include "get_bits.h"
 
-//#define DEBUG
-//#define DEBUG_PACKET_CONTENTS
-
 /* Parser (mostly) copied from dvdsub.c */
 
 #define PARSE_BUF_SIZE  (65536)
@@ -53,25 +50,20 @@ static int dvbsub_parse(AVCodecParserContext *s,
 {
     DVBSubParseContext *pc = s->priv_data;
     uint8_t *p, *p_end;
-    int len, buf_pos = 0;
+    int i, len, buf_pos = 0;
 
     av_dlog(avctx, "DVB parse packet pts=%"PRIx64", lpts=%"PRIx64", cpts=%"PRIx64":\n",
             s->pts, s->last_pts, s->cur_frame_pts[s->cur_frame_start_index]);
 
-#ifdef DEBUG_PACKET_CONTENTS
-    int i;
-
     for (i=0; i < buf_size; i++)
     {
-        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        av_dlog(avctx, "%02x ", buf[i]);
         if (i % 16 == 15)
-            av_log(avctx, AV_LOG_INFO, "\n");
+            av_dlog(avctx, "\n");
     }
 
     if (i % 16 != 0)
-        av_log(avctx, AV_LOG_INFO, "\n");
-
-#endif
+        av_dlog(avctx, "\n");
 
     *poutbuf = NULL;
     *poutbuf_size = 0;
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index b6b57b8915..66659ae637 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -24,9 +24,6 @@
 #include "bytestream.h"
 #include "libavutil/colorspace.h"
 
-//#define DEBUG
-//#define DEBUG_PACKET_CONTENTS
-
 #define DVBSUB_PAGE_SEGMENT     0x10
 #define DVBSUB_REGION_SEGMENT   0x11
 #define DVBSUB_CLUT_SEGMENT     0x12
@@ -749,20 +746,17 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
     av_dlog(avctx, "DVB pixel block size %d, %s field:\n", buf_size,
             top_bottom ? "bottom" : "top");
 
-#ifdef DEBUG_PACKET_CONTENTS
     for (i = 0; i < buf_size; i++) {
         if (i % 16 == 0)
-            av_log(avctx, AV_LOG_INFO, "0x%08p: ", buf+i);
+            av_dlog(avctx, "0x%8p: ", buf+i);
 
-        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        av_dlog(avctx, "%02x ", buf[i]);
         if (i % 16 == 15)
-            av_log(avctx, AV_LOG_INFO, "\n");
+            av_dlog(avctx, "\n");
     }
 
     if (i % 16)
-        av_log(avctx, AV_LOG_INFO, "\n");
-
-#endif
+        av_dlog(avctx, "\n");
 
     if (region == 0)
         return;
@@ -911,27 +905,22 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
     DVBSubContext *ctx = avctx->priv_data;
 
     const uint8_t *buf_end = buf + buf_size;
-    int clut_id;
+    int i, clut_id;
     DVBSubCLUT *clut;
     int entry_id, depth , full_range;
     int y, cr, cb, alpha;
     int r, g, b, r_add, g_add, b_add;
 
-#ifdef DEBUG_PACKET_CONTENTS
-    int i;
-
-    av_log(avctx, AV_LOG_INFO, "DVB clut packet:\n");
+    av_dlog(avctx, "DVB clut packet:\n");
 
     for (i=0; i < buf_size; i++) {
-        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        av_dlog(avctx, "%02x ", buf[i]);
         if (i % 16 == 15)
-            av_log(avctx, AV_LOG_INFO, "\n");
+            av_dlog(avctx, "\n");
     }
 
     if (i % 16)
-        av_log(avctx, AV_LOG_INFO, "\n");
-
-#endif
+        av_dlog(avctx, "\n");
 
     clut_id = *buf++;
     buf += 1;
@@ -1405,22 +1394,18 @@ static int dvbsub_decode(AVCodecContext *avctx,
     int segment_type;
     int page_id;
     int segment_length;
-
-#ifdef DEBUG_PACKET_CONTENTS
     int i;
 
-    av_log(avctx, AV_LOG_INFO, "DVB sub packet:\n");
+    av_dlog(avctx, "DVB sub packet:\n");
 
     for (i=0; i < buf_size; i++) {
-        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        av_dlog(avctx, "%02x ", buf[i]);
         if (i % 16 == 15)
-            av_log(avctx, AV_LOG_INFO, "\n");
+            av_dlog(avctx, "\n");
     }
 
     if (i % 16)
-        av_log(avctx, AV_LOG_INFO, "\n");
-
-#endif
+        av_dlog(avctx, "\n");
 
     if (buf_size <= 6 || *buf != 0x0f) {
         av_dlog(avctx, "incomplete or broken packet");
diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index a480da1168..9546c38882 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -30,8 +30,6 @@
 #include "libavutil/colorspace.h"
 #include "libavutil/imgutils.h"
 
-//#define DEBUG_PACKET_CONTENTS
-
 #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 
 enum SegmentType {
@@ -404,21 +402,18 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size,
     const uint8_t *buf_end;
     uint8_t       segment_type;
     int           segment_length;
-
-#ifdef DEBUG_PACKET_CONTENTS
     int i;
 
-    av_log(avctx, AV_LOG_INFO, "PGS sub packet:\n");
+    av_dlog(avctx, "PGS sub packet:\n");
 
     for (i = 0; i < buf_size; i++) {
-        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        av_dlog(avctx, "%02x ", buf[i]);
         if (i % 16 == 15)
-            av_log(avctx, AV_LOG_INFO, "\n");
+            av_dlog(avctx, "\n");
     }
 
     if (i & 15)
-        av_log(avctx, AV_LOG_INFO, "\n");
-#endif
+        av_dlog(avctx, "\n");
 
     *data_size = 0;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 6c3c95caa2..3aec86ceb1 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -23,7 +23,6 @@
 #include <limits.h>
 
 //#define DEBUG
-//#define DEBUG_METADATA
 //#define MOV_EXPORT_ALL_METADATA
 
 #include "libavutil/intreadwrite.h"
@@ -210,11 +209,9 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             av_metadata_set2(&c->fc->metadata, key2, str, 0);
         }
     }
-#ifdef DEBUG_METADATA
-    av_log(c->fc, AV_LOG_DEBUG, "lang \"%3s\" ", language);
-    av_log(c->fc, AV_LOG_DEBUG, "tag \"%s\" value \"%s\" atom \"%.4s\" %d %lld\n",
-           key, str, (char*)&atom.type, str_size, atom.size);
-#endif
+    av_dlog(c->fc, "lang \"%3s\" ", language);
+    av_dlog(c->fc, "tag \"%s\" value \"%s\" atom \"%.4s\" %d %"PRId64"\n",
+            key, str, (char*)&atom.type, str_size, atom.size);
 
     return 0;
 }

From e8f6bd4b88405526dea26daaf86e94509e7144df Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 13:32:37 +0200
Subject: [PATCH 648/830] configure: Document --enable-vdpau.

The option is disabled by default, so enabling it should be documented.
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 34c1ed6d25..2a1883bfa0 100755
--- a/configure
+++ b/configure
@@ -107,7 +107,7 @@ Configuration options:
   --disable-mdct           disable MDCT code
   --disable-rdft           disable RDFT code
   --enable-vaapi           enable VAAPI code
-  --disable-vdpau          disable VDPAU code
+  --enable-vdpau           enable VDPAU code
   --disable-dxva2          disable DXVA2 code
   --enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
   --enable-hardcoded-tables use hardcoded tables instead of runtime generation

From 7a8228c05e23d85b11a3853bf25a7ddbc662e2e3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 7 Jun 2011 13:22:36 +0200
Subject: [PATCH 649/830] sws: make dither_scale const

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswscale/swscale.c          | 2 +-
 libswscale/swscale_internal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 29c26e87ac..68258f2f69 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -271,7 +271,7 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
 
 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
 
-uint16_t dither_scale[15][16]={
+const uint16_t dither_scale[15][16]={
 {    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
 {    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
 {    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 16e7d3288c..1403e7dd20 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -498,7 +498,7 @@ const char *sws_format_name(enum PixelFormat format);
 extern const uint64_t ff_dither4[2];
 extern const uint64_t ff_dither8[2];
 extern const uint8_t dithers[8][8][8];
-extern uint16_t dither_scale[15][16];
+extern const uint16_t dither_scale[15][16];
 
 
 extern const AVClass sws_context_class;

From 3251af90379e69faf7b13e2d0236056c37bb0922 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 7 Jun 2011 13:36:06 +0200
Subject: [PATCH 650/830] v4l2: set default standard to NULL

It was errouneously changed to "NTSC" by the recent merge commit.
---
 libavdevice/v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 117f7cc534..7223654891 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -710,7 +710,7 @@ static int v4l2_read_close(AVFormatContext *s1)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 
 static const AVOption options[] = {
-    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = "NTSC" }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
+    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", OFFSET(channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },

From 2d9570a304054c009b7f3fee60fff1299f25c7f5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 7 Jun 2011 14:02:38 +0200
Subject: [PATCH 651/830] vorbisdec: Restore mistakenly removed debug output.

---
 libavcodec/vorbisdec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index b969d45783..017102e777 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -20,6 +20,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <inttypes.h>
 #include <math.h>
 
 #define ALT_BITSTREAM_READER_LE
@@ -1089,6 +1090,9 @@ static int vorbis_floor0_decode(vorbis_context *vc,
             for (i = 0; i < order; i++)
                 lsp[i] = 2.0f * cos(lsp[i]);
 
+            av_dlog(NULL, "floor0 synth: map_size = %"PRIu32"; m = %d; wstep = %f\n",
+                    vf->map_size[blockflag], order, wstep);
+
             i = 0;
             while (i < vf->map_size[blockflag]) {
                 int j, iter_cond = vf->map[blockflag][i];

From b9478cfefb408ae1d476555b46893005e2d3bf4c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 7 Jun 2011 10:52:30 +0200
Subject: [PATCH 652/830] shorten: Remove stray DEBUG #define and corresponding
 av_dlog statement.

---
 libavcodec/shorten.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 86e981f417..a6e00750e9 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -26,7 +26,6 @@
  *
  */
 
-#define DEBUG
 #include <limits.h>
 #include "avcodec.h"
 #include "get_bits.h"
@@ -304,7 +303,6 @@ static int shorten_decode_frame(AVCodecContext *avctx,
         s->bitstream_size= buf_size;
 
         if(buf_size < s->max_framesize){
-            //av_dlog(avctx, "wanna more data ... %d\n", buf_size);
             *data_size = 0;
             return input_buf_size;
         }

From bda9b20fa49975c6b9c39308818e7773eb78b411 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:15:44 -0400
Subject: [PATCH 653/830] swscale: un-special-case yuv2yuvX16_c().

Make yuv2yuvX16_c a function pointer for yuv2yuvX(), so that the
function pointer becomes bitdepth-independent.
---
 libswscale/ppc/swscale_altivec.c  |  6 ++-
 libswscale/swscale.c              | 63 +++++++++++++++++--------------
 libswscale/x86/swscale_template.c |  5 ++-
 3 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 7f4dfcd6f1..423297a8f0 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -403,11 +403,15 @@ static void hScale_altivec_real(int16_t *dst, int dstW,
 
 void ff_sws_init_swScale_altivec(SwsContext *c)
 {
+    enum PixelFormat dstFormat = c->dstFormat;
+
     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
         return;
 
     c->hScale       = hScale_altivec_real;
-    c->yuv2yuvX     = yuv2yuvX_altivec_real;
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
+        c->yuv2yuvX     = yuv2yuvX_altivec_real;
+    }
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index fd64b81019..35b0233f15 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -258,17 +258,20 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
             output_pixel(&aDest[i], val);
         }
     }
+#undef output_pixel
 }
 
 #define yuv2NBPS(bits, BE_LE, is_be) \
-static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
+static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                               const int16_t **lumSrc, int lumFilterSize, \
                               const int16_t *chrFilter, const int16_t **chrUSrc, \
                               const int16_t **chrVSrc, \
                               int chrFilterSize, const int16_t **alpSrc, \
-                              uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
-                              uint16_t *aDest, int dstW, int chrDstW) \
+                              uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
+                              uint8_t *_aDest, int dstW, int chrDstW) \
 { \
+    uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
+             *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                           alpSrc, \
@@ -282,24 +285,24 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX16_c(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
-                                const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
+                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
                                 enum PixelFormat dstFormat)
 {
 #define conv16(bits) \
     if (isBE(dstFormat)) { \
-        yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                               alpSrc, \
-                               dest, uDest, vDest, aDest, \
-                               dstW, chrDstW); \
+        yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
+                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                 alpSrc, \
+                                 dest, uDest, vDest, aDest, \
+                                 dstW, chrDstW); \
     } else { \
-        yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                               alpSrc, \
-                               dest, uDest, vDest, aDest, \
-                               dstW, chrDstW); \
+        yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
+                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                 alpSrc, \
+                                 dest, uDest, vDest, aDest, \
+                                 dstW, chrDstW); \
     }
     if (is16BPS(dstFormat)) {
         conv16(16);
@@ -1783,14 +1786,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                 chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
-                                 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                 dstFormat);
-                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+                if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                     const int16_t *lumBuf = lumSrcPtr[0];
                     const int16_t *chrUBuf= chrUSrcPtr[0];
                     const int16_t *chrVBuf= chrVSrcPtr[0];
@@ -1870,9 +1866,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                    yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
                                  dstFormat);
                 } else {
                     yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
@@ -1921,11 +1917,22 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 
 static void sws_init_swScale_c(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
+    enum PixelFormat srcFormat = c->srcFormat,
+                     dstFormat = c->dstFormat;
 
     c->yuv2nv12X    = yuv2nv12X_c;
-    c->yuv2yuv1     = yuv2yuv1_c;
-    c->yuv2yuvX     = yuv2yuvX_c;
+    if (is16BPS(dstFormat)) {
+        c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
+    } else if (is9_OR_10BPS(dstFormat)) {
+        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
+            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
+        } else {
+            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
+        }
+    } else {
+        c->yuv2yuv1     = yuv2yuv1_c;
+        c->yuv2yuvX     = yuv2yuvX_c;
+    }
     c->yuv2packed1  = yuv2packed1_c;
     c->yuv2packed2  = yuv2packed2_c;
     c->yuv2packedX  = yuv2packedX_c;
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index efaadaa09e..174c3fbfd6 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2185,8 +2185,10 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
+    enum PixelFormat srcFormat = c->srcFormat,
+                     dstFormat = c->dstFormat;
 
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
     if (!(c->flags & SWS_BITEXACT)) {
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
@@ -2236,6 +2238,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
                 break;
         }
     }
+    }
 
 #if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );

From ea281753120f55fc3a3102ac54ba291d0ffcd43d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:37:59 -0400
Subject: [PATCH 654/830] swscale: remove alp/chr/lumSrcOffset.

They are hacks added to reuse the same scaling function for
different formats and they may cause problems when SIMD
implementation of the same functions are used along with pure
C functions.
---
 libswscale/swscale.c          | 201 +++++++++++++---------------------
 libswscale/swscale_internal.h |   4 -
 2 files changed, 76 insertions(+), 129 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 35b0233f15..d97c4e1259 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -979,93 +979,58 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                       uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int r = src[i*6+0];
-        int g = src[i*6+2];
-        int b = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                        const uint8_t *src1, const uint8_t *src2,
-                        int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int b = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int b= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-    }
-}
-
-static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                       uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src[i*6+0];
-        int g = src[i*6+2];
-        int r = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                        const uint8_t *src1, const uint8_t *src2,
-                        int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int r = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int r= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-    }
+#define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
+                       uint8_t *dst, const uint8_t *src, int width, \
+                       uint32_t *unused) \
+{ \
+    int i; \
+    for (i = 0; i < width; i++) { \
+        int compA = rfunc(&src[i*6+0]) >> 8; \
+        int compB = rfunc(&src[i*6+2]) >> 8; \
+        int compC = rfunc(&src[i*6+4]) >> 8; \
+ \
+        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+    } \
+} \
+ \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
+                        uint8_t *dstU, uint8_t *dstV, \
+                        const uint8_t *src1, const uint8_t *src2, \
+                        int width, uint32_t *unused) \
+{ \
+    int i; \
+    assert(src1==src2); \
+    for (i = 0; i < width; i++) { \
+        int compA = rfunc(&src1[6*i + 0]) >> 8; \
+        int compB = rfunc(&src1[6*i + 2]) >> 8; \
+        int compC = rfunc(&src1[6*i + 4]) >> 8; \
+ \
+        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+    } \
+} \
+ \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
+                            uint8_t *dstU, uint8_t *dstV, \
+                            const uint8_t *src1, const uint8_t *src2, \
+                            int width, uint32_t *unused) \
+{ \
+    int i; \
+    assert(src1==src2); \
+    for (i = 0; i < width; i++) { \
+        int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
+        int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
+        int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
+ \
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
+    } \
 }
+rgb48funcs(LE, AV_RL16, r, g, b);
+rgb48funcs(BE, AV_RB16, r, g, b);
+rgb48funcs(LE, AV_RL16, b, g, r);
+rgb48funcs(BE, AV_RB16, b, g, r);
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
 static void name ## _c(uint8_t *dst, const uint8_t *src, \
@@ -1098,6 +1063,14 @@ static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unu
     }
 }
 
+static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i+3];
+    }
+}
+
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
                        const uint8_t *src, const uint8_t *dummy, \
@@ -1532,8 +1505,6 @@ static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
-    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
-
     if (toYV12) {
         toYV12(formatConvBuffer, src, srcW, pal);
         src= formatConvBuffer;
@@ -1570,10 +1541,6 @@ static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int ds
                            const int16_t *hChrFilterPos, int hChrFilterSize,
                            uint8_t *formatConvBuffer, uint32_t *pal)
 {
-
-    src1 += c->chrSrcOffset;
-    src2 += c->chrSrcOffset;
-
     if (c->chrToYV12) {
         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
@@ -1969,10 +1936,10 @@ static void sws_init_swScale_c(SwsContext *c)
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
@@ -1986,10 +1953,10 @@ static void sws_init_swScale_c(SwsContext *c)
         }
     } else {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
@@ -2038,37 +2005,21 @@ static void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
-    case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
-    case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
+    case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
+    case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
     }
     if (c->alpPixBuf) {
         switch (srcFormat) {
-        case PIX_FMT_RGB32  :
-        case PIX_FMT_RGB32_1:
-        case PIX_FMT_BGR32  :
-        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
-        case PIX_FMT_Y400A  : c->alpToYV12 = yuy2ToY_c; break;
+        case PIX_FMT_BGRA:
+        case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
+        case PIX_FMT_ABGR:
+        case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
+        case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
         }
     }
 
-    switch (srcFormat) {
-    case PIX_FMT_Y400A  :
-        c->alpSrcOffset = 1;
-        break;
-    case PIX_FMT_RGB32  :
-    case PIX_FMT_BGR32  :
-        c->alpSrcOffset = 3;
-        break;
-    case PIX_FMT_RGB48LE:
-    case PIX_FMT_BGR48LE:
-        c->lumSrcOffset = 1;
-        c->chrSrcOffset = 1;
-        c->alpSrcOffset = 1;
-        break;
-    }
-
     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
         if (c->srcRange) {
             c->lumConvertRange = lumRangeFromJpeg_c;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 6aaa843015..a9adb3f07a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -313,10 +313,6 @@ typedef struct SwsContext {
     void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
     void (*chrConvertRange)(uint16_t *dst1, uint16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
 
-    int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions.
-    int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions.
-    int alpSrcOffset; ///< Offset given to alpha src pointers passed to horizontal input functions.
-
     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
 } SwsContext;

From b3b28b080f588a63d9bcf2714daa7119ece2da61 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:40:04 -0400
Subject: [PATCH 655/830] swscale: cosmetics.

---
 libswscale/swscale.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index d97c4e1259..f6ed45382e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1906,8 +1906,7 @@ static void sws_init_swScale_c(SwsContext *c)
 
     c->hScale       = hScale_c;
 
-    if (c->flags & SWS_FAST_BILINEAR)
-    {
+    if (c->flags & SWS_FAST_BILINEAR) {
         c->hyscale_fast = hyscale_fast_c;
         c->hcscale_fast = hcscale_fast_c;
     }

From ca364a5b43044bc98a7aef001fb1543b7b13411d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:46:16 -0400
Subject: [PATCH 656/830] swscale: extract SWS_FULL_CHR_H_INT conditional into
 init code.

---
 libswscale/ppc/swscale_altivec.c  |  2 +-
 libswscale/swscale.c              | 60 +++++++++++--------------------
 libswscale/x86/swscale_template.c |  6 ++++
 3 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 423297a8f0..42e965de95 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -415,7 +415,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index f6ed45382e..9705b14d00 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1770,50 +1770,28 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             } else {
                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
+                if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c, //FIXME write a packed1_full function
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
-                                        chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL,
-                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
+                    c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                                   *chrVSrcPtr, *(chrVSrcPtr+1),
+                                   alpPixBuf ? *alpSrcPtr : NULL,
+                                   dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
                     int lumAlpha= vLumFilter[2*dstY+1];
                     int chrAlpha= vChrFilter[2*dstY+1];
                     lumMmxFilter[2]=
                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
                     chrMmxFilter[2]=
                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c, //FIXME write a packed2_full function
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                       dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
+                    c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                                   *chrVSrcPtr, *(chrVSrcPtr+1),
+                                   alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
+                                   dest, dstW, lumAlpha, chrAlpha, dstY);
                 } else { //general RGB
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c,
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packedX(c,
-                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                       alpSrcPtr, dest, dstW, dstY);
-                    }
+                    c->yuv2packedX(c,
+                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                   alpSrcPtr, dest, dstW, dstY);
                 }
             }
         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
@@ -1900,9 +1878,13 @@ static void sws_init_swScale_c(SwsContext *c)
         c->yuv2yuv1     = yuv2yuv1_c;
         c->yuv2yuvX     = yuv2yuvX_c;
     }
-    c->yuv2packed1  = yuv2packed1_c;
-    c->yuv2packed2  = yuv2packed2_c;
-    c->yuv2packedX  = yuv2packedX_c;
+    if(c->flags & SWS_FULL_CHR_H_INT) {
+        c->yuv2packedX = yuv2rgbX_c_full;
+    } else {
+        c->yuv2packed1  = yuv2packed1_c;
+        c->yuv2packed2  = yuv2packed2_c;
+        c->yuv2packedX  = yuv2packedX_c;
+    }
 
     c->hScale       = hScale_c;
 
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 174c3fbfd6..bd72b75326 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2193,6 +2193,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
             c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
+            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
             case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
@@ -2201,9 +2202,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
             case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
             default: break;
             }
+            }
         } else {
             c->yuv2yuv1     = RENAME(yuv2yuv1    );
             c->yuv2yuvX     = RENAME(yuv2yuvX    );
+            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
             case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
@@ -2212,7 +2215,10 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
             case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
             default: break;
             }
+            }
         }
+        }
+        if (!(c->flags & SWS_FULL_CHR_H_INT)) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
                 c->yuv2packed1 = RENAME(yuv2rgb32_1);

From 0fb5193156053b820579df9ab4596b950eb05d4d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:48:51 -0400
Subject: [PATCH 657/830] swscale: reindent x86 init code.

---
 libswscale/x86/swscale_template.c | 70 +++++++++++++++----------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index bd72b75326..3646ccceff 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2189,62 +2189,62 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
                      dstFormat = c->dstFormat;
 
     if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
-    if (!(c->flags & SWS_BITEXACT)) {
-        if (c->flags & SWS_ACCURATE_RND) {
-            c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
-            c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
-            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-            switch (c->dstFormat) {
-            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
-            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
-            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
-            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
-            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
-            default: break;
+        if (!(c->flags & SWS_BITEXACT)) {
+            if (c->flags & SWS_ACCURATE_RND) {
+                c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
+                c->yuv2yuvX = RENAME(yuv2yuvX_ar    );
+                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
+                    switch (c->dstFormat) {
+                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
+                    case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
+                    case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
+                    case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
+                    case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
+                    default: break;
+                    }
+                }
+            } else {
+                c->yuv2yuv1 = RENAME(yuv2yuv1    );
+                c->yuv2yuvX = RENAME(yuv2yuvX    );
+                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
+                    switch (c->dstFormat) {
+                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
+                    case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
+                    case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
+                    case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
+                    case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
+                    default: break;
+                    }
+                }
             }
-            }
-        } else {
-            c->yuv2yuv1     = RENAME(yuv2yuv1    );
-            c->yuv2yuvX     = RENAME(yuv2yuvX    );
-            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-            switch (c->dstFormat) {
-            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
-            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
-            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
-            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
-            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
-            default: break;
-            }
-            }
-        }
         }
         if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-        switch (c->dstFormat) {
-        case PIX_FMT_RGB32:
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:
                 c->yuv2packed1 = RENAME(yuv2rgb32_1);
                 c->yuv2packed2 = RENAME(yuv2rgb32_2);
                 break;
-        case PIX_FMT_BGR24:
+            case PIX_FMT_BGR24:
                 c->yuv2packed1 = RENAME(yuv2bgr24_1);
                 c->yuv2packed2 = RENAME(yuv2bgr24_2);
                 break;
-        case PIX_FMT_RGB555:
+            case PIX_FMT_RGB555:
                 c->yuv2packed1 = RENAME(yuv2rgb555_1);
                 c->yuv2packed2 = RENAME(yuv2rgb555_2);
                 break;
-        case PIX_FMT_RGB565:
+            case PIX_FMT_RGB565:
                 c->yuv2packed1 = RENAME(yuv2rgb565_1);
                 c->yuv2packed2 = RENAME(yuv2rgb565_2);
                 break;
-        case PIX_FMT_YUYV422:
+            case PIX_FMT_YUYV422:
                 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
                 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
                 break;
-        default:
+            default:
                 break;
+            }
         }
     }
-    }
 
 #if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );

From df91d091747395384a7c585cee568f9949e6c9f2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:27:45 -0400
Subject: [PATCH 658/830] swscale: integrate yuv2nv12X_C into yuv2yuvX()
 function pointers.

---
 libswscale/swscale.c          | 27 ++++++++++++---------------
 libswscale/swscale_internal.h |  6 ------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 9705b14d00..845af3d190 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -362,10 +362,13 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc,
-                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                               int dstW, int chrDstW, enum PixelFormat dstFormat)
+                               const int16_t **chrVSrc, int chrFilterSize,
+                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
+                               uint8_t *vDest, uint8_t *aDest,
+                               int dstW, int chrDstW)
 {
+    enum PixelFormat dstFormat = c->dstFormat;
+
     //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++) {
@@ -1743,14 +1746,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                c->yuv2nv12X(c,
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
+            if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
@@ -1805,8 +1801,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
                             lumSrcPtr, vLumFilterSize,
                             vChrFilter+chrDstY*vChrFilterSize,
-                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                            dest, uDest, dstW, chrDstW, dstFormat);
+                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize, NULL,
+                            dest, uDest, NULL, NULL, dstW, chrDstW);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
@@ -1865,8 +1861,9 @@ static void sws_init_swScale_c(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;
 
-    c->yuv2nv12X    = yuv2nv12X_c;
-    if (is16BPS(dstFormat)) {
+    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+        c->yuv2yuvX     = yuv2nv12X_c;
+    } else if (is16BPS(dstFormat)) {
         c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
     } else if (is9_OR_10BPS(dstFormat)) {
         if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a9adb3f07a..dcf8eb522a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -250,12 +250,6 @@ typedef struct SwsContext {
 #endif
 
     /* function pointers for swScale() */
-    void (*yuv2nv12X  )(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        uint8_t *dest, uint8_t *uDest,
-                        int dstW, int chrDstW, int dstFormat);
     void (*yuv2yuv1   )(struct SwsContext *c,
                         const int16_t *lumSrc, const int16_t *chrUSrc,
                         const int16_t *chrVSrc, const int16_t *alpSrc,

From b73fe700253f1e93c3ca10f72fc8159d7e12aaa1 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:31:11 -0400
Subject: [PATCH 659/830] swscale: add yuv2planar/packed function typedefs.

---
 libswscale/swscale_internal.h | 74 +++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index dcf8eb522a..013eef9e31 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -57,6 +57,41 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t* dst[], int dstStride[]);
 
+typedef void (*yuv2planar1_fn) (struct SwsContext *c,
+                                const int16_t *lumSrc, const int16_t *chrUSrc,
+                                const int16_t *chrVSrc, const int16_t *alpSrc,
+                                uint8_t *dest,
+                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                int dstW, int chrDstW);
+typedef void (*yuv2planarX_fn) (struct SwsContext *c,
+                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
+                                const int16_t **alpSrc,
+                                uint8_t *dest,
+                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                int dstW, int chrDstW);
+typedef void (*yuv2packed1_fn) (struct SwsContext *c,
+                                const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0,
+                                uint8_t *dest,
+                                int dstW, int uvalpha, int dstFormat, int flags, int y);
+typedef void (*yuv2packed2_fn) (struct SwsContext *c,
+                                const uint16_t *buf0, const uint16_t *buf1,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, const uint16_t *abuf1,
+                                uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y);
+typedef void (*yuv2packedX_fn) (struct SwsContext *c,
+                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
+                                const int16_t **alpSrc, uint8_t *dest,
+                                int dstW, int dstY);
+
 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
     /**
@@ -250,40 +285,11 @@ typedef struct SwsContext {
 #endif
 
     /* function pointers for swScale() */
-    void (*yuv2yuv1   )(struct SwsContext *c,
-                        const int16_t *lumSrc, const int16_t *chrUSrc,
-                        const int16_t *chrVSrc, const int16_t *alpSrc,
-                        uint8_t *dest,
-                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
-    void (*yuv2yuvX   )(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        const int16_t **alpSrc,
-                        uint8_t *dest,
-                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
-    void (*yuv2packed1)(struct SwsContext *c,
-                        const uint16_t *buf0,
-                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                        const uint16_t *abuf0,
-                        uint8_t *dest,
-                        int dstW, int uvalpha, int dstFormat, int flags, int y);
-    void (*yuv2packed2)(struct SwsContext *c,
-                        const uint16_t *buf0, const uint16_t *buf1,
-                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                        const uint16_t *abuf0, const uint16_t *abuf1,
-                        uint8_t *dest,
-                        int dstW, int yalpha, int uvalpha, int y);
-    void (*yuv2packedX)(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        const int16_t **alpSrc, uint8_t *dest,
-                        int dstW, int dstY);
+    yuv2planar1_fn yuv2yuv1;
+    yuv2planarX_fn yuv2yuvX;
+    yuv2packed1_fn yuv2packed1;
+    yuv2packed2_fn yuv2packed2;
+    yuv2packedX_fn yuv2packedX;
 
     void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
                       int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.

From edeb56fa31071841125f723b741858a4187ee748 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:54:30 -0400
Subject: [PATCH 660/830] swscale: remove duplicate conversion routine in
 swScale().

---
 libswscale/swscale.c | 126 ++++++++++++++++++-------------------------
 1 file changed, 53 insertions(+), 73 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 845af3d190..8f41547be1 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1562,6 +1562,37 @@ static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int ds
         c->chrConvertRange(dst1, dst2, dstWidth);
 }
 
+static av_always_inline void
+find_c_packed_planar_out_funcs(SwsContext *c,
+                               yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
+                               yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
+                               yuv2packedX_fn *yuv2packedX)
+{
+    enum PixelFormat dstFormat = c->dstFormat;
+
+    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+        *yuv2yuvX     = yuv2nv12X_c;
+    } else if (is16BPS(dstFormat)) {
+        *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
+    } else if (is9_OR_10BPS(dstFormat)) {
+        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
+            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
+        } else {
+            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
+        }
+    } else {
+        *yuv2yuv1     = yuv2yuv1_c;
+        *yuv2yuvX     = yuv2yuvX_c;
+    }
+    if(c->flags & SWS_FULL_CHR_H_INT) {
+        *yuv2packedX = yuv2rgbX_c_full;
+    } else {
+        *yuv2packed1  = yuv2packed1_c;
+        *yuv2packed2  = yuv2packed2_c;
+        *yuv2packedX  = yuv2packedX_c;
+    }
+}
+
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
@@ -1605,6 +1636,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
     int lastDstY;
     uint32_t *pal=c->pal_yuv;
+    yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
+    yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
+    yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+    yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
+    yuv2packedX_fn yuv2packedX = c->yuv2packedX;
 
     /* vars which will change and which we need to store back in the context */
     int dstY= c->dstY;
@@ -1741,7 +1777,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 #if HAVE_MMX
         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
 #endif
-        if (dstY < dstH-2) {
+        if (dstY >= dstH-2) {
+            // hmm looks like we can't use MMX here without overwriting this array's tail
+            find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
+                                           &yuv2packed1, &yuv2packed2,
+                                           &yuv2packedX);
+        }
+
+        {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -1754,10 +1797,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                     const int16_t *chrUBuf= chrUSrcPtr[0];
                     const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
+                    yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
                                 uDest, vDest, aDest, dstW, chrDstW);
                 } else { //General YV12
-                    c->yuv2yuvX(c,
+                    yuv2yuvX(c,
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
                                 chrVSrcPtr, vChrFilterSize,
@@ -1768,7 +1811,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
-                    c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                    yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
                                    *chrVSrcPtr, *(chrVSrcPtr+1),
                                    alpPixBuf ? *alpSrcPtr : NULL,
                                    dest, dstW, chrAlpha, dstFormat, flags, dstY);
@@ -1779,61 +1822,17 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
                     chrMmxFilter[2]=
                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                    yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
                                    *chrVSrcPtr, *(chrVSrcPtr+1),
                                    alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
                                    dest, dstW, lumAlpha, chrAlpha, dstY);
                 } else { //general RGB
-                    c->yuv2packedX(c,
+                    yuv2packedX(c,
                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                    alpSrcPtr, dest, dstW, dstY);
                 }
             }
-        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
-            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
-                            lumSrcPtr, vLumFilterSize,
-                            vChrFilter+chrDstY*vChrFilterSize,
-                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize, NULL,
-                            dest, uDest, NULL, NULL, dstW, chrDstW);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
-                                 dstFormat);
-                } else {
-                    yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
-                               lumSrcPtr, vLumFilterSize,
-                               vChrFilter+chrDstY*vChrFilterSize,
-                               chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                               alpSrcPtr, dest, uDest, vDest, aDest,
-                               dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbX_c_full(c,
-                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                    alpSrcPtr, dest, dstW, dstY);
-                } else {
-                    yuv2packedX_c(c,
-                                  vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, dest, dstW, dstY);
-                }
-            }
         }
     }
 
@@ -1858,30 +1857,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 
 static void sws_init_swScale_c(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat,
-                     dstFormat = c->dstFormat;
+    enum PixelFormat srcFormat = c->srcFormat;
 
-    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-        c->yuv2yuvX     = yuv2nv12X_c;
-    } else if (is16BPS(dstFormat)) {
-        c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
-    } else if (is9_OR_10BPS(dstFormat)) {
-        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
-            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
-        } else {
-            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
-        }
-    } else {
-        c->yuv2yuv1     = yuv2yuv1_c;
-        c->yuv2yuvX     = yuv2yuvX_c;
-    }
-    if(c->flags & SWS_FULL_CHR_H_INT) {
-        c->yuv2packedX = yuv2rgbX_c_full;
-    } else {
-        c->yuv2packed1  = yuv2packed1_c;
-        c->yuv2packed2  = yuv2packed2_c;
-        c->yuv2packedX  = yuv2packedX_c;
-    }
+    find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
+                                   &c->yuv2packed1, &c->yuv2packed2,
+                                   &c->yuv2packedX);
 
     c->hScale       = hScale_c;
 

From 2f37321abcbf9be5f58647b17155c0e257949c0d Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Mon, 6 Jun 2011 17:10:11 -0400
Subject: [PATCH 661/830] iirfilter: fix biquad filter coefficients.

The current filter implementation should only have the cx coefficients
divided by gain in order to give the correct output scale.
---
 libavcodec/iirfilter.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c
index 98366e2c08..a942068aee 100644
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -151,8 +151,6 @@ static int biquad_init_coeffs(void *avc, struct FFIIRFilterCoeffs *c,
     // during filtering, the delay state will include the gain multiplication
     c->cx[0] = lrintf(x0 / c->gain);
     c->cx[1] = lrintf(x1 / c->gain);
-    c->cy[0] /= c->gain;
-    c->cy[1] /= c->gain;
 
     return 0;
 }

From 1929807bef88c3ec3e18434fe82cf04d8f51c7a1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 7 Jun 2011 18:22:53 +0200
Subject: [PATCH 662/830] libvpxenc: add forgotten AVClass.

Fixes Ticket269

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/libvpxenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index b55e7559b4..f5c942e0fc 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -48,6 +48,7 @@ struct FrameListData {
 };
 
 typedef struct VP8EncoderContext {
+    AVClass *av_class;
     struct vpx_codec_ctx encoder;
     struct vpx_image rawimg;
     struct vpx_fixed_buf twopass_stats;

From a6703faa157294efb75619f55c06d7ed5a0aa2bf Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 6 Jun 2011 01:14:50 +0200
Subject: [PATCH 663/830] samplefmt: add av_get_bytes_per_sample()

Deprecate av_get_bits_per_sample_fmt(), which was a misnamed function.

For the moment we don't have sample formats with a non-integer number
of bytes, in that case we may need to create a new
av_get_bits_per_sample() function. In the meanwhile we prefer to adopt
this variant, since avoids divisions by 8 all over the place.
---
 doc/APIchanges        |  4 ++++
 libavutil/avutil.h    |  5 ++++-
 libavutil/samplefmt.c |  8 ++++++++
 libavutil/samplefmt.h | 16 +++++++++++++---
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 77eb6d2d27..1133397a43 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-07 - xxxxxxx - lavu 51.4.0 - av_get_bytes_per_sample()
+  Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
+  Deprecate av_get_bits_per_sample_fmt().
+
 2011-06-xx - xxxxxxx - lavu 51.3.0 - opt.h
   Add av_opt_free convenience function.
 
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 5085a6dd0b..8536790109 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  3
+#define LIBAVUTIL_VERSION_MINOR  4
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
@@ -57,6 +57,9 @@
  * Those FF_API_* defines are not part of public API.
  * They may change, break or disappear at any time.
  */
+#ifndef FF_API_GET_BITS_PER_SAMPLE_FMT
+#define FF_API_GET_BITS_PER_SAMPLE_FMT (LIBAVUTIL_VERSION_MAJOR < 52)
+#endif
 
 /**
  * Return the LIBAVUTIL_VERSION_INT constant.
diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c
index 06b50d9bd9..5b0bfa0257 100644
--- a/libavutil/samplefmt.c
+++ b/libavutil/samplefmt.c
@@ -66,8 +66,16 @@ char *av_get_sample_fmt_string (char *buf, int buf_size, enum AVSampleFormat sam
     return buf;
 }
 
+int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
+{
+     return sample_fmt < 0 || sample_fmt >= AV_SAMPLE_FMT_NB ?
+        0 : sample_fmt_info[sample_fmt].bits >> 3;
+}
+
+#if FF_API_GET_BITS_PER_SAMPLE_FMT
 int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt)
 {
     return sample_fmt < 0 || sample_fmt >= AV_SAMPLE_FMT_NB ?
         0 : sample_fmt_info[sample_fmt].bits;
 }
+#endif
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index 2326f4a11a..e38214927f 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -19,6 +19,8 @@
 #ifndef AVUTIL_SAMPLEFMT_H
 #define AVUTIL_SAMPLEFMT_H
 
+#include "avutil.h"
+
 /**
  * all in native-endian format
  */
@@ -58,13 +60,21 @@ enum AVSampleFormat av_get_sample_fmt(const char *name);
  */
 char *av_get_sample_fmt_string(char *buf, int buf_size, enum AVSampleFormat sample_fmt);
 
+#if FF_API_GET_BITS_PER_SAMPLE_FMT
 /**
- * Return sample format bits per sample.
+ * @deprecated Use av_get_bytes_per_sample() instead.
+ */
+attribute_deprecated
+int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt);
+#endif
+
+/**
+ * Return number of bytes per sample.
  *
  * @param sample_fmt the sample format
- * @return number of bits per sample or zero if unknown for the given
+ * @return number of bytes per sample or zero if unknown for the given
  * sample format
  */
-int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt);
+int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);
 
 #endif /* AVUTIL_SAMPLEFMT_H */

From 1397ac0aa0ac98333b9e88d258b625eec99c011f Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 7 Jun 2011 13:04:06 -0400
Subject: [PATCH 664/830] APIchanges: fill-in git commit hash for
 av_get_bytes_per_sample() addition

---
 doc/APIchanges | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 1133397a43..0ce63fc382 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,7 +13,7 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-06-07 - xxxxxxx - lavu 51.4.0 - av_get_bytes_per_sample()
+2011-06-07 - a6703fa - lavu 51.4.0 - av_get_bytes_per_sample()
   Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
   Deprecate av_get_bits_per_sample_fmt().
 

From 7b20d35a543c6635c348f3b312b5fa24137c0f95 Mon Sep 17 00:00:00 2001
From: John Stebbins <stebbins@jetheaddev.com>
Date: Tue, 7 Jun 2011 19:53:31 +0200
Subject: [PATCH 665/830] vc1: re-initialize tables after width/height change.

read_sequence_header can change width/height; therefore, re-initialize
all tables if width/height changed

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/vc1dec.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 5c931b122a..186610ea95 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -3398,7 +3398,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
     VC1Context *v = avctx->priv_data;
     MpegEncContext *s = &v->s;
     GetBitContext gb;
-    int i;
+    int i, cur_width, cur_height;
 
     if (!avctx->extradata_size || !avctx->extradata) return -1;
     if (!(avctx->flags & CODEC_FLAG_GRAY))
@@ -3419,8 +3419,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
     if (vc1_init_common(v) < 0) return -1;
     ff_vc1dsp_init(&v->vc1dsp);
 
-    avctx->coded_width = avctx->width;
-    avctx->coded_height = avctx->height;
+    cur_width = avctx->coded_width = avctx->width;
+    cur_height = avctx->coded_height = avctx->height;
     if (avctx->codec_id == CODEC_ID_WMV3)
     {
         int count = 0;
@@ -3491,6 +3491,19 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
         }
         v->res_sprite = (avctx->codec_tag == MKTAG('W','V','P','2'));
     }
+    // Sequence header information may not have been parsed
+    // yet when ff_msmpeg4_decode_init was called the fist time
+    // above.  If sequence information changes, we need to call
+    // it again.
+    if (cur_width != avctx->width ||
+        cur_height != avctx->height) {
+        MPV_common_end(s);
+        if(ff_msmpeg4_decode_init(avctx) < 0)
+            return -1;
+        avctx->coded_width = avctx->width;
+        avctx->coded_height = avctx->height;
+    }
+
     avctx->profile = v->profile;
     if (v->profile == PROFILE_ADVANCED)
         avctx->level = v->level;

From a8bd53402a6d361b3da7c2c206c999b12ea1e5c6 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 7 Jun 2011 10:53:44 -0400
Subject: [PATCH 666/830] ac3enc: remove unused #define

---
 libavcodec/ac3enc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 53f6251a5e..4ee3c175d7 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -50,9 +50,6 @@
 #endif
 
 
-/** Maximum number of exponent groups. +1 for separate DC exponent. */
-#define AC3_MAX_EXP_GROUPS 85
-
 #if CONFIG_AC3ENC_FLOAT
 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
 typedef float SampleType;

From 787a13535a451b802a23abf211a34840c27bbc23 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 7 Jun 2011 11:50:38 -0400
Subject: [PATCH 667/830] ac3enc: remove convenience macro, #define DEBUG

---
 libavcodec/ac3enc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 4ee3c175d7..1ab0c62b91 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -26,7 +26,6 @@
  * The simplest AC-3 encoder.
  */
 
-//#define DEBUG
 //#define ASSERT_LEVEL 2
 
 #include <stdint.h>

From c8e9ea43d004be04e4a9a07736104d099a5bf1f9 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 7 Jun 2011 12:47:09 -0400
Subject: [PATCH 668/830] Move E-AC-3 encoder functions to a separate eac3enc.c
 file.

---
 libavcodec/Makefile  |   3 +-
 libavcodec/ac3enc.c  | 284 +------------------------------------------
 libavcodec/ac3enc.h  | 214 ++++++++++++++++++++++++++++++++
 libavcodec/eac3enc.c | 131 ++++++++++++++++++++
 libavcodec/eac3enc.h |  44 +++++++
 5 files changed, 396 insertions(+), 280 deletions(-)
 create mode 100644 libavcodec/ac3enc.h
 create mode 100644 libavcodec/eac3enc.c
 create mode 100644 libavcodec/eac3enc.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b772bf185e..7fd6b49d32 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -124,7 +124,8 @@ OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3dec.o eac3dec_data.o
-OBJS-$(CONFIG_EAC3_ENCODER)            += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
+OBJS-$(CONFIG_EAC3_ENCODER)            += eac3enc.o ac3enc_float.o ac3tab.o \
+                                          ac3.o kbdwin.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
 OBJS-$(CONFIG_EAMAD_DECODER)           += eamad.o eaidct.o mpeg12.o \
                                           mpeg12data.o mpegvideo.o  \
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 1ab0c62b91..9e011af094 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -42,183 +42,8 @@
 #include "ac3.h"
 #include "audioconvert.h"
 #include "fft.h"
-
-
-#ifndef CONFIG_AC3ENC_FLOAT
-#define CONFIG_AC3ENC_FLOAT 0
-#endif
-
-
-#if CONFIG_AC3ENC_FLOAT
-#define MAC_COEF(d,a,b) ((d)+=(a)*(b))
-typedef float SampleType;
-typedef float CoefType;
-typedef float CoefSumType;
-#else
-#define MAC_COEF(d,a,b) MAC64(d,a,b)
-typedef int16_t SampleType;
-typedef int32_t CoefType;
-typedef int64_t CoefSumType;
-#endif
-
-typedef struct AC3MDCTContext {
-    const SampleType *window;           ///< MDCT window function
-    FFTContext fft;                     ///< FFT context for MDCT calculation
-} AC3MDCTContext;
-
-/**
- * Encoding Options used by AVOption.
- */
-typedef struct AC3EncOptions {
-    /* AC-3 metadata options*/
-    int dialogue_level;
-    int bitstream_mode;
-    float center_mix_level;
-    float surround_mix_level;
-    int dolby_surround_mode;
-    int audio_production_info;
-    int mixing_level;
-    int room_type;
-    int copyright;
-    int original;
-    int extended_bsi_1;
-    int preferred_stereo_downmix;
-    float ltrt_center_mix_level;
-    float ltrt_surround_mix_level;
-    float loro_center_mix_level;
-    float loro_surround_mix_level;
-    int extended_bsi_2;
-    int dolby_surround_ex_mode;
-    int dolby_headphone_mode;
-    int ad_converter_type;
-
-    /* other encoding options */
-    int allow_per_frame_metadata;
-    int stereo_rematrixing;
-    int channel_coupling;
-    int cpl_start;
-} AC3EncOptions;
-
-/**
- * Data for a single audio block.
- */
-typedef struct AC3Block {
-    CoefType **mdct_coef;                       ///< MDCT coefficients
-    int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
-    uint8_t  **exp;                             ///< original exponents
-    uint8_t  **grouped_exp;                     ///< grouped exponents
-    int16_t  **psd;                             ///< psd per frequency bin
-    int16_t  **band_psd;                        ///< psd per critical band
-    int16_t  **mask;                            ///< masking curve
-    uint16_t **qmant;                           ///< quantized mantissas
-    uint8_t  **cpl_coord_exp;                   ///< coupling coord exponents           (cplcoexp)
-    uint8_t  **cpl_coord_mant;                  ///< coupling coord mantissas           (cplcomant)
-    uint8_t  coeff_shift[AC3_MAX_CHANNELS];     ///< fixed-point coefficient shift values
-    uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
-    int      num_rematrixing_bands;             ///< number of rematrixing bands
-    uint8_t  rematrixing_flags[4];              ///< rematrixing flags
-    int      new_cpl_strategy;                  ///< send new coupling strategy
-    int      cpl_in_use;                        ///< coupling in use for this block     (cplinu)
-    uint8_t  channel_in_cpl[AC3_MAX_CHANNELS];  ///< channel in coupling                (chincpl)
-    int      num_cpl_channels;                  ///< number of channels in coupling
-    uint8_t  new_cpl_coords;                    ///< send new coupling coordinates      (cplcoe)
-    uint8_t  cpl_master_exp[AC3_MAX_CHANNELS];  ///< coupling coord master exponents    (mstrcplco)
-    int      new_snr_offsets;                   ///< send new SNR offsets
-    int      new_cpl_leak;                      ///< send new coupling leak info
-    int      end_freq[AC3_MAX_CHANNELS];        ///< end frequency bin                  (endmant)
-} AC3Block;
-
-/**
- * AC-3 encoder private context.
- */
-typedef struct AC3EncodeContext {
-    AVClass *av_class;                      ///< AVClass used for AVOption
-    AC3EncOptions options;                  ///< encoding options
-    PutBitContext pb;                       ///< bitstream writer context
-    DSPContext dsp;
-    AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
-    AC3MDCTContext mdct;                    ///< MDCT context
-
-    AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
-
-    int eac3;                               ///< indicates if this is E-AC-3 vs. AC-3
-    int bitstream_id;                       ///< bitstream id                           (bsid)
-    int bitstream_mode;                     ///< bitstream mode                         (bsmod)
-
-    int bit_rate;                           ///< target bit rate, in bits-per-second
-    int sample_rate;                        ///< sampling frequency, in Hz
-
-    int frame_size_min;                     ///< minimum frame size in case rounding is necessary
-    int frame_size;                         ///< current frame size in bytes
-    int frame_size_code;                    ///< frame size code                        (frmsizecod)
-    uint16_t crc_inv[2];
-    int64_t bits_written;                   ///< bit count    (used to avg. bitrate)
-    int64_t samples_written;                ///< sample count (used to avg. bitrate)
-
-    int fbw_channels;                       ///< number of full-bandwidth channels      (nfchans)
-    int channels;                           ///< total number of channels               (nchans)
-    int lfe_on;                             ///< indicates if there is an LFE channel   (lfeon)
-    int lfe_channel;                        ///< channel index of the LFE channel
-    int has_center;                         ///< indicates if there is a center channel
-    int has_surround;                       ///< indicates if there are one or more surround channels
-    int channel_mode;                       ///< channel mode                           (acmod)
-    const uint8_t *channel_map;             ///< channel map used to reorder channels
-
-    int center_mix_level;                   ///< center mix level code
-    int surround_mix_level;                 ///< surround mix level code
-    int ltrt_center_mix_level;              ///< Lt/Rt center mix level code
-    int ltrt_surround_mix_level;            ///< Lt/Rt surround mix level code
-    int loro_center_mix_level;              ///< Lo/Ro center mix level code
-    int loro_surround_mix_level;            ///< Lo/Ro surround mix level code
-
-    int cutoff;                             ///< user-specified cutoff frequency, in Hz
-    int bandwidth_code;                     ///< bandwidth code (0 to 60)               (chbwcod)
-    int start_freq[AC3_MAX_CHANNELS];       ///< start frequency bin                    (strtmant)
-    int cpl_end_freq;                       ///< coupling channel end frequency bin
-
-    int cpl_on;                             ///< coupling turned on for this frame
-    int cpl_enabled;                        ///< coupling enabled for all frames
-    int num_cpl_subbands;                   ///< number of coupling subbands            (ncplsubnd)
-    int num_cpl_bands;                      ///< number of coupling bands               (ncplbnd)
-    uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS];  ///< number of coeffs in each coupling band
-
-    int rematrixing_enabled;                ///< stereo rematrixing enabled
-
-    /* bitrate allocation control */
-    int slow_gain_code;                     ///< slow gain code                         (sgaincod)
-    int slow_decay_code;                    ///< slow decay code                        (sdcycod)
-    int fast_decay_code;                    ///< fast decay code                        (fdcycod)
-    int db_per_bit_code;                    ///< dB/bit code                            (dbpbcod)
-    int floor_code;                         ///< floor code                             (floorcod)
-    AC3BitAllocParameters bit_alloc;        ///< bit allocation parameters
-    int coarse_snr_offset;                  ///< coarse SNR offsets                     (csnroffst)
-    int fast_gain_code[AC3_MAX_CHANNELS];   ///< fast gain codes (signal-to-mask ratio) (fgaincod)
-    int fine_snr_offset[AC3_MAX_CHANNELS];  ///< fine SNR offsets                       (fsnroffst)
-    int frame_bits_fixed;                   ///< number of non-coefficient bits for fixed parameters
-    int frame_bits;                         ///< all frame bits except exponents and mantissas
-    int exponent_bits;                      ///< number of bits used for exponents
-
-    SampleType **planar_samples;
-    uint8_t *bap_buffer;
-    uint8_t *bap1_buffer;
-    CoefType *mdct_coef_buffer;
-    int32_t *fixed_coef_buffer;
-    uint8_t *exp_buffer;
-    uint8_t *grouped_exp_buffer;
-    int16_t *psd_buffer;
-    int16_t *band_psd_buffer;
-    int16_t *mask_buffer;
-    uint16_t *qmant_buffer;
-    uint8_t *cpl_coord_exp_buffer;
-    uint8_t *cpl_coord_mant_buffer;
-
-    uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
-    uint8_t exp_ref_block[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< reference blocks for EXP_REUSE
-    uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
-    int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
-
-    DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
-} AC3EncodeContext;
+#include "ac3enc.h"
+#include "eac3enc.h"
 
 typedef struct AC3Mant {
     uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; ///< mantissa pointers for bap=1,2,4
@@ -741,34 +566,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
         }
     }
 
-    if (s->eac3) {
-        /* set first cpl coords */
-        int first_cpl_coords[AC3_MAX_CHANNELS];
-        for (ch = 1; ch <= s->fbw_channels; ch++)
-            first_cpl_coords[ch] = 1;
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            for (ch = 1; ch <= s->fbw_channels; ch++) {
-                if (block->channel_in_cpl[ch]) {
-                    if (first_cpl_coords[ch]) {
-                        block->new_cpl_coords = 2;
-                        first_cpl_coords[ch]  = 0;
-                    }
-                } else {
-                    first_cpl_coords[ch] = 1;
-                }
-            }
-        }
-
-        /* set first cpl leak */
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            if (block->cpl_in_use) {
-                block->new_cpl_leak = 2;
-                break;
-            }
-        }
-    }
+    if (s->eac3)
+        ff_eac3_set_cpl_states(s);
 #endif /* CONFIG_AC3ENC_FLOAT */
 }
 
@@ -1905,79 +1704,6 @@ static void ac3_output_frame_header(AC3EncodeContext *s)
 }
 
 
-/**
- * Write the E-AC-3 frame header to the output bitstream.
- */
-static void eac3_output_frame_header(AC3EncodeContext *s)
-{
-    int blk, ch;
-    AC3EncOptions *opt = &s->options;
-
-    put_bits(&s->pb, 16, 0x0b77);                   /* sync word */
-
-    /* BSI header */
-    put_bits(&s->pb,  2, 0);                        /* stream type = independent */
-    put_bits(&s->pb,  3, 0);                        /* substream id = 0 */
-    put_bits(&s->pb, 11, (s->frame_size / 2) - 1);  /* frame size */
-    if (s->bit_alloc.sr_shift) {
-        put_bits(&s->pb, 2, 0x3);                   /* fscod2 */
-        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
-    } else {
-        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
-        put_bits(&s->pb, 2, 0x3);                   /* number of blocks = 6 */
-    }
-    put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
-    put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
-    put_bits(&s->pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
-    put_bits(&s->pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
-    put_bits(&s->pb, 1, 0);                         /* no compression gain */
-    put_bits(&s->pb, 1, 0);                         /* no mixing metadata */
-    /* TODO: mixing metadata */
-    put_bits(&s->pb, 1, 0);                         /* no info metadata */
-    /* TODO: info metadata */
-    put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
-
-    /* frame header */
-    put_bits(&s->pb, 1, 1);                         /* exponent strategy syntax = each block */
-    put_bits(&s->pb, 1, 0);                         /* aht enabled = no */
-    put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
-    put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* dither flag syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* bit allocation model syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* fast gain codes enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* dba syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* skip field syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* spx enabled = no */
-    /* coupling strategy use flags */
-    if (s->channel_mode > AC3_CHMODE_MONO) {
-        put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
-        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            put_bits(&s->pb, 1, block->new_cpl_strategy);
-            if (block->new_cpl_strategy)
-                put_bits(&s->pb, 1, block->cpl_in_use);
-        }
-    }
-    /* exponent strategy */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
-        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
-            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
-    if (s->lfe_on) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
-            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
-    }
-    /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
-    for (ch = 1; ch <= s->fbw_channels; ch++)
-        put_bits(&s->pb, 5, 0);
-    /* snr offsets */
-    put_bits(&s->pb, 6, s->coarse_snr_offset);
-    put_bits(&s->pb, 4, s->fine_snr_offset[1]);
-    /* block start info */
-    put_bits(&s->pb, 1, 0);
-}
-
-
 /**
  * Write one audio block to the output bitstream.
  */
@@ -2262,7 +1988,7 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame)
     init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
 
     if (s->eac3)
-        eac3_output_frame_header(s);
+        ff_eac3_output_frame_header(s);
     else
         ac3_output_frame_header(s);
 
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
new file mode 100644
index 0000000000..0541683537
--- /dev/null
+++ b/libavcodec/ac3enc.h
@@ -0,0 +1,214 @@
+/*
+ * AC-3 encoder & E-AC-3 encoder common header
+ * Copyright (c) 2000 Fabrice Bellard
+ * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AC-3 encoder & E-AC-3 encoder common header
+ */
+
+#ifndef AVCODEC_AC3ENC_H
+#define AVCODEC_AC3ENC_H
+
+#include <stdint.h>
+#include "ac3.h"
+#include "ac3dsp.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "put_bits.h"
+#include "fft.h"
+
+#ifndef CONFIG_AC3ENC_FLOAT
+#define CONFIG_AC3ENC_FLOAT 0
+#endif
+
+#if CONFIG_AC3ENC_FLOAT
+#define MAC_COEF(d,a,b) ((d)+=(a)*(b))
+typedef float SampleType;
+typedef float CoefType;
+typedef float CoefSumType;
+#else
+#define MAC_COEF(d,a,b) MAC64(d,a,b)
+typedef int16_t SampleType;
+typedef int32_t CoefType;
+typedef int64_t CoefSumType;
+#endif
+
+typedef struct AC3MDCTContext {
+    const SampleType *window;           ///< MDCT window function
+    FFTContext fft;                     ///< FFT context for MDCT calculation
+} AC3MDCTContext;
+
+/**
+ * Encoding Options used by AVOption.
+ */
+typedef struct AC3EncOptions {
+    /* AC-3 metadata options*/
+    int dialogue_level;
+    int bitstream_mode;
+    float center_mix_level;
+    float surround_mix_level;
+    int dolby_surround_mode;
+    int audio_production_info;
+    int mixing_level;
+    int room_type;
+    int copyright;
+    int original;
+    int extended_bsi_1;
+    int preferred_stereo_downmix;
+    float ltrt_center_mix_level;
+    float ltrt_surround_mix_level;
+    float loro_center_mix_level;
+    float loro_surround_mix_level;
+    int extended_bsi_2;
+    int dolby_surround_ex_mode;
+    int dolby_headphone_mode;
+    int ad_converter_type;
+
+    /* other encoding options */
+    int allow_per_frame_metadata;
+    int stereo_rematrixing;
+    int channel_coupling;
+    int cpl_start;
+} AC3EncOptions;
+
+/**
+ * Data for a single audio block.
+ */
+typedef struct AC3Block {
+    CoefType **mdct_coef;                       ///< MDCT coefficients
+    int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
+    uint8_t  **exp;                             ///< original exponents
+    uint8_t  **grouped_exp;                     ///< grouped exponents
+    int16_t  **psd;                             ///< psd per frequency bin
+    int16_t  **band_psd;                        ///< psd per critical band
+    int16_t  **mask;                            ///< masking curve
+    uint16_t **qmant;                           ///< quantized mantissas
+    uint8_t  **cpl_coord_exp;                   ///< coupling coord exponents           (cplcoexp)
+    uint8_t  **cpl_coord_mant;                  ///< coupling coord mantissas           (cplcomant)
+    uint8_t  coeff_shift[AC3_MAX_CHANNELS];     ///< fixed-point coefficient shift values
+    uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
+    int      num_rematrixing_bands;             ///< number of rematrixing bands
+    uint8_t  rematrixing_flags[4];              ///< rematrixing flags
+    int      new_cpl_strategy;                  ///< send new coupling strategy
+    int      cpl_in_use;                        ///< coupling in use for this block     (cplinu)
+    uint8_t  channel_in_cpl[AC3_MAX_CHANNELS];  ///< channel in coupling                (chincpl)
+    int      num_cpl_channels;                  ///< number of channels in coupling
+    uint8_t  new_cpl_coords;                    ///< send new coupling coordinates      (cplcoe)
+    uint8_t  cpl_master_exp[AC3_MAX_CHANNELS];  ///< coupling coord master exponents    (mstrcplco)
+    int      new_snr_offsets;                   ///< send new SNR offsets
+    int      new_cpl_leak;                      ///< send new coupling leak info
+    int      end_freq[AC3_MAX_CHANNELS];        ///< end frequency bin                  (endmant)
+} AC3Block;
+
+/**
+ * AC-3 encoder private context.
+ */
+typedef struct AC3EncodeContext {
+    AVClass *av_class;                      ///< AVClass used for AVOption
+    AC3EncOptions options;                  ///< encoding options
+    PutBitContext pb;                       ///< bitstream writer context
+    DSPContext dsp;
+    AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
+    AC3MDCTContext mdct;                    ///< MDCT context
+
+    AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
+
+    int eac3;                               ///< indicates if this is E-AC-3 vs. AC-3
+    int bitstream_id;                       ///< bitstream id                           (bsid)
+    int bitstream_mode;                     ///< bitstream mode                         (bsmod)
+
+    int bit_rate;                           ///< target bit rate, in bits-per-second
+    int sample_rate;                        ///< sampling frequency, in Hz
+
+    int frame_size_min;                     ///< minimum frame size in case rounding is necessary
+    int frame_size;                         ///< current frame size in bytes
+    int frame_size_code;                    ///< frame size code                        (frmsizecod)
+    uint16_t crc_inv[2];
+    int64_t bits_written;                   ///< bit count    (used to avg. bitrate)
+    int64_t samples_written;                ///< sample count (used to avg. bitrate)
+
+    int fbw_channels;                       ///< number of full-bandwidth channels      (nfchans)
+    int channels;                           ///< total number of channels               (nchans)
+    int lfe_on;                             ///< indicates if there is an LFE channel   (lfeon)
+    int lfe_channel;                        ///< channel index of the LFE channel
+    int has_center;                         ///< indicates if there is a center channel
+    int has_surround;                       ///< indicates if there are one or more surround channels
+    int channel_mode;                       ///< channel mode                           (acmod)
+    const uint8_t *channel_map;             ///< channel map used to reorder channels
+
+    int center_mix_level;                   ///< center mix level code
+    int surround_mix_level;                 ///< surround mix level code
+    int ltrt_center_mix_level;              ///< Lt/Rt center mix level code
+    int ltrt_surround_mix_level;            ///< Lt/Rt surround mix level code
+    int loro_center_mix_level;              ///< Lo/Ro center mix level code
+    int loro_surround_mix_level;            ///< Lo/Ro surround mix level code
+
+    int cutoff;                             ///< user-specified cutoff frequency, in Hz
+    int bandwidth_code;                     ///< bandwidth code (0 to 60)               (chbwcod)
+    int start_freq[AC3_MAX_CHANNELS];       ///< start frequency bin                    (strtmant)
+    int cpl_end_freq;                       ///< coupling channel end frequency bin
+
+    int cpl_on;                             ///< coupling turned on for this frame
+    int cpl_enabled;                        ///< coupling enabled for all frames
+    int num_cpl_subbands;                   ///< number of coupling subbands            (ncplsubnd)
+    int num_cpl_bands;                      ///< number of coupling bands               (ncplbnd)
+    uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS];  ///< number of coeffs in each coupling band
+
+    int rematrixing_enabled;                ///< stereo rematrixing enabled
+
+    /* bitrate allocation control */
+    int slow_gain_code;                     ///< slow gain code                         (sgaincod)
+    int slow_decay_code;                    ///< slow decay code                        (sdcycod)
+    int fast_decay_code;                    ///< fast decay code                        (fdcycod)
+    int db_per_bit_code;                    ///< dB/bit code                            (dbpbcod)
+    int floor_code;                         ///< floor code                             (floorcod)
+    AC3BitAllocParameters bit_alloc;        ///< bit allocation parameters
+    int coarse_snr_offset;                  ///< coarse SNR offsets                     (csnroffst)
+    int fast_gain_code[AC3_MAX_CHANNELS];   ///< fast gain codes (signal-to-mask ratio) (fgaincod)
+    int fine_snr_offset[AC3_MAX_CHANNELS];  ///< fine SNR offsets                       (fsnroffst)
+    int frame_bits_fixed;                   ///< number of non-coefficient bits for fixed parameters
+    int frame_bits;                         ///< all frame bits except exponents and mantissas
+    int exponent_bits;                      ///< number of bits used for exponents
+
+    SampleType **planar_samples;
+    uint8_t *bap_buffer;
+    uint8_t *bap1_buffer;
+    CoefType *mdct_coef_buffer;
+    int32_t *fixed_coef_buffer;
+    uint8_t *exp_buffer;
+    uint8_t *grouped_exp_buffer;
+    int16_t *psd_buffer;
+    int16_t *band_psd_buffer;
+    int16_t *mask_buffer;
+    uint16_t *qmant_buffer;
+    uint8_t *cpl_coord_exp_buffer;
+    uint8_t *cpl_coord_mant_buffer;
+
+    uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
+    uint8_t exp_ref_block[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< reference blocks for EXP_REUSE
+    uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
+    int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
+
+    DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
+} AC3EncodeContext;
+
+#endif /* AVCODEC_AC3ENC_H */
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
new file mode 100644
index 0000000000..20f4b879c6
--- /dev/null
+++ b/libavcodec/eac3enc.c
@@ -0,0 +1,131 @@
+/*
+ * E-AC-3 encoder
+ * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * E-AC-3 encoder
+ */
+
+#define CONFIG_AC3ENC_FLOAT 1
+#include "ac3enc.h"
+#include "eac3enc.h"
+
+void ff_eac3_set_cpl_states(AC3EncodeContext *s)
+{
+    int ch, blk;
+    int first_cpl_coords[AC3_MAX_CHANNELS];
+
+    /* set first cpl coords */
+    for (ch = 1; ch <= s->fbw_channels; ch++)
+        first_cpl_coords[ch] = 1;
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (block->channel_in_cpl[ch]) {
+                if (first_cpl_coords[ch]) {
+                    block->new_cpl_coords = 2;
+                    first_cpl_coords[ch]  = 0;
+                }
+            } else {
+                first_cpl_coords[ch] = 1;
+            }
+        }
+    }
+
+    /* set first cpl leak */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        if (block->cpl_in_use) {
+            block->new_cpl_leak = 2;
+            break;
+        }
+    }
+}
+
+
+void ff_eac3_output_frame_header(AC3EncodeContext *s)
+{
+    int blk, ch;
+    AC3EncOptions *opt = &s->options;
+
+    put_bits(&s->pb, 16, 0x0b77);                   /* sync word */
+
+    /* BSI header */
+    put_bits(&s->pb,  2, 0);                        /* stream type = independent */
+    put_bits(&s->pb,  3, 0);                        /* substream id = 0 */
+    put_bits(&s->pb, 11, (s->frame_size / 2) - 1);  /* frame size */
+    if (s->bit_alloc.sr_shift) {
+        put_bits(&s->pb, 2, 0x3);                   /* fscod2 */
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+    } else {
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+        put_bits(&s->pb, 2, 0x3);                   /* number of blocks = 6 */
+    }
+    put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
+    put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
+    put_bits(&s->pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
+    put_bits(&s->pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
+    put_bits(&s->pb, 1, 0);                         /* no compression gain */
+    put_bits(&s->pb, 1, 0);                         /* no mixing metadata */
+    /* TODO: mixing metadata */
+    put_bits(&s->pb, 1, 0);                         /* no info metadata */
+    /* TODO: info metadata */
+    put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
+
+    /* frame header */
+    put_bits(&s->pb, 1, 1);                         /* exponent strategy syntax = each block */
+    put_bits(&s->pb, 1, 0);                         /* aht enabled = no */
+    put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
+    put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dither flag syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* bit allocation model syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* fast gain codes enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dba syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* skip field syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* spx enabled = no */
+    /* coupling strategy use flags */
+    if (s->channel_mode > AC3_CHMODE_MONO) {
+        put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
+        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            put_bits(&s->pb, 1, block->new_cpl_strategy);
+            if (block->new_cpl_strategy)
+                put_bits(&s->pb, 1, block->cpl_in_use);
+        }
+    }
+    /* exponent strategy */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
+            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+    if (s->lfe_on) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    }
+    /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
+    for (ch = 1; ch <= s->fbw_channels; ch++)
+        put_bits(&s->pb, 5, 0);
+    /* snr offsets */
+    put_bits(&s->pb, 6, s->coarse_snr_offset);
+    put_bits(&s->pb, 4, s->fine_snr_offset[1]);
+    /* block start info */
+    put_bits(&s->pb, 1, 0);
+}
diff --git a/libavcodec/eac3enc.h b/libavcodec/eac3enc.h
new file mode 100644
index 0000000000..eacb8cf164
--- /dev/null
+++ b/libavcodec/eac3enc.h
@@ -0,0 +1,44 @@
+/*
+ * E-AC-3 encoder
+ * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * E-AC-3 encoder
+ */
+
+#ifndef AVCODEC_EAC3ENC_H
+#define AVCODEC_EAC3ENC_H
+
+#include "ac3enc.h"
+
+/**
+ * Set coupling states.
+ * This determines whether certain flags must be written to the bitstream or
+ * whether they will be implicitly already known by the decoder.
+ */
+void ff_eac3_set_cpl_states(AC3EncodeContext *s);
+
+/**
+ * Write the E-AC-3 frame header to the output bitstream.
+ */
+void ff_eac3_output_frame_header(AC3EncodeContext *s);
+
+#endif /* AVCODEC_EAC3ENC_H */

From d3778972d854b72685b10e0b4afedcc1af6f5433 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 8 Jun 2011 00:06:19 +0200
Subject: [PATCH 669/830] ac3enc: Fix linking of AC-3 encoder without the
 E-AC-3 encoder.

The AC-3 encoder unconditionally references some symbols from the E-AC-3
encoder; make those references conditional to fix linking.
---
 libavcodec/ac3enc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 9e011af094..e8ccde514a 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -566,7 +566,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
         }
     }
 
-    if (s->eac3)
+    if (CONFIG_EAC3_ENCODER && s->eac3)
         ff_eac3_set_cpl_states(s);
 #endif /* CONFIG_AC3ENC_FLOAT */
 }
@@ -1987,7 +1987,7 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame)
 
     init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
 
-    if (s->eac3)
+    if (CONFIG_EAC3_ENCODER && s->eac3)
         ff_eac3_output_frame_header(s);
     else
         ac3_output_frame_header(s);

From 4de83b7b6d3ba321877f69621ef0de497384060c Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Tue, 7 Jun 2011 10:31:51 -0400
Subject: [PATCH 670/830] H264: x86 predict init cosmetics.

Change indentation and whitespace; also move HAVE_YASM blocks.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/h264_intrapred_init.c | 220 +++++++++++++--------------
 1 file changed, 110 insertions(+), 110 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index da5553571a..9d6726c31c 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -124,132 +124,132 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
 {
     int mm_flags = av_get_cpu_flags();
 
+#if HAVE_YASM
     if (bit_depth == 8) {
-#if HAVE_YASM
-    if (mm_flags & AV_CPU_FLAG_MMX) {
-        h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
-        h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
-        h->pred8x8  [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
-        h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
-            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_mmx;
-        } else {
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
-            if (codec_id == CODEC_ID_SVQ3) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
-            } else if (codec_id == CODEC_ID_RV40) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx;
+        if (mm_flags & AV_CPU_FLAG_MMX) {
+            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_mmx;
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_mmx;
+            h->pred8x8  [VERT_PRED8x8         ] = ff_pred8x8_vertical_mmx;
+            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmx;
+            if (codec_id == CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_mmx;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_mmx;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_mmx;
             } else {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx;
+                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
+                if (codec_id == CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
+                } else if (codec_id == CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx;
+                }
             }
         }
-    }
 
-    if (mm_flags & AV_CPU_FLAG_MMX2) {
-        h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
-        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmxext;
-        h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
-        h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
-        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_mmxext;
-        h->pred8x8l [HOR_PRED    ] = ff_pred8x8l_horizontal_mmxext;
-        h->pred8x8l [VERT_PRED   ] = ff_pred8x8l_vertical_mmxext;
-        h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext;
-        h->pred8x8l [VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_mmxext;
-        h->pred8x8l [HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_mmxext;
-        h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_mmxext;
-        h->pred8x8l [HOR_DOWN_PRED       ] = ff_pred8x8l_horizontal_down_mmxext;
-        h->pred4x4  [DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_mmxext;
-        h->pred4x4  [VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_mmxext;
-        h->pred4x4  [HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_mmxext;
-        h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
-        if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
-            h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
-        if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
-            h->pred4x4  [VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_mmxext;
-        if (codec_id != CODEC_ID_RV40) {
-            h->pred4x4  [HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_mmxext;
-        }
-        if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
-            h->pred8x8  [TOP_DC_PRED8x8      ] = ff_pred8x8_top_dc_mmxext;
-            h->pred8x8  [DC_PRED8x8          ] = ff_pred8x8_dc_mmxext;
-        }
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
-            h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmxext;
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
-            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_mmxext;
-            h->pred4x4  [VERT_PRED    ] = ff_pred4x4_vertical_vp8_mmxext;
-        } else {
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
-            if (codec_id == CODEC_ID_SVQ3) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx2;
-            } else if (codec_id == CODEC_ID_RV40) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx2;
+        if (mm_flags & AV_CPU_FLAG_MMX2) {
+            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_mmxext;
+            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_mmxext;
+            h->pred8x8  [HOR_PRED8x8            ] = ff_pred8x8_horizontal_mmxext;
+            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_mmxext;
+            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_mmxext;
+            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_mmxext;
+            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_mmxext;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_mmxext;
+            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_mmxext;
+            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_mmxext;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_mmxext;
+            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_mmxext;
+            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_mmxext;
+            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_mmxext;
+            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_mmxext;
+            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_mmxext;
+            if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264) {
+                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_mmxext;
+            }
+            if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
+                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_mmxext;
+            }
+            if (codec_id != CODEC_ID_RV40) {
+                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_mmxext;
+            }
+            if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
+                h->pred8x8  [TOP_DC_PRED8x8     ] = ff_pred8x8_top_dc_mmxext;
+                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_mmxext;
+            }
+            if (codec_id == CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_mmxext;
+                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_mmxext;
+                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_mmxext;
+                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_mmxext;
+                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_mmxext;
             } else {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx2;
+                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
+                if (codec_id == CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_mmx2;
+                } else if (codec_id == CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_mmx2;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_mmx2;
+                }
             }
         }
-    }
 
-    if (mm_flags & AV_CPU_FLAG_SSE) {
-        h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
-    }
+        if (mm_flags & AV_CPU_FLAG_SSE) {
+            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
+        }
 
-    if (mm_flags & AV_CPU_FLAG_SSE2) {
-        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_sse2;
-        h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
-        h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
-        h->pred8x8l [VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_sse2;
-        h->pred8x8l [VERT_LEFT_PRED      ] = ff_pred8x8l_vertical_left_sse2;
-        h->pred8x8l [HOR_DOWN_PRED       ] = ff_pred8x8l_horizontal_down_sse2;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
-        } else {
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_sse2;
-            if (codec_id == CODEC_ID_SVQ3) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
-            } else if (codec_id == CODEC_ID_RV40) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2;
+        if (mm_flags & AV_CPU_FLAG_SSE2) {
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_sse2;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_sse2;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_sse2;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_sse2;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_sse2;
+            if (codec_id == CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_sse2;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_sse2;
             } else {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_plane_sse2;
+                if (codec_id == CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
+                } else if (codec_id == CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2;
+                }
             }
         }
-    }
 
-    if (mm_flags & AV_CPU_FLAG_SSSE3) {
-        h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
-        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_ssse3;
-        h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
-        h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
-        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_ssse3;
-        h->pred8x8l [HOR_PRED    ] = ff_pred8x8l_horizontal_ssse3;
-        h->pred8x8l [VERT_PRED   ] = ff_pred8x8l_vertical_ssse3;
-        h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_ssse3;
-        h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3;
-        h->pred8x8l [VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_ssse3;
-        h->pred8x8l [VERT_LEFT_PRED      ] = ff_pred8x8l_vertical_left_ssse3;
-        h->pred8x8l [HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_ssse3;
-        h->pred8x8l [HOR_DOWN_PRED       ] = ff_pred8x8l_horizontal_down_ssse3;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
-            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;
-        } else {
-            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
-            if (codec_id == CODEC_ID_SVQ3) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
-            } else if (codec_id == CODEC_ID_RV40) {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3;
+        if (mm_flags & AV_CPU_FLAG_SSSE3) {
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_ssse3;
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_ssse3;
+            h->pred8x8  [HOR_PRED8x8          ] = ff_pred8x8_horizontal_ssse3;
+            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_ssse3;
+            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_ssse3;
+            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_ssse3;
+            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_ssse3;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_ssse3;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_ssse3;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_ssse3;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_ssse3;
+            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_ssse3;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_ssse3;
+            if (codec_id == CODEC_ID_VP8) {
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_ssse3;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_ssse3;
             } else {
-                h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3;
+                h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
+                if (codec_id == CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
+                } else if (codec_id == CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3;
+                }
             }
         }
-    }
-#endif
     } else if (bit_depth == 10) {
-#if HAVE_YASM
         if (mm_flags & AV_CPU_FLAG_MMX2) {
             h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
             h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
@@ -270,13 +270,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
         }
 #if HAVE_AVX
-        if (mm_flags&AV_CPU_FLAG_AVX) {
+        if (mm_flags & AV_CPU_FLAG_AVX) {
             h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
             h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
             h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
             h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
         }
 #endif /* HAVE_AVX */
-#endif /* HAVE_YASM */
     }
+#endif /* HAVE_YASM */
 }

From 8543f0f923eb1d476b14444e1cc8034f08ebcdda Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 8 Jun 2011 00:41:12 +0200
Subject: [PATCH 671/830] ffplay: Fix non-compiling debug printf and replace it
 by av_dlog.

---
 ffplay.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index 4ccad0618d..5700883828 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1062,10 +1062,9 @@ static double compute_target_time(double frame_current_pts, VideoState *is)
         }
     }
     is->frame_timer += delay;
-#if defined(DEBUG_SYNC)
-    printf("video: delay=%0.3f actual_delay=%0.3f pts=%0.3f A-V=%f\n",
-            delay, actual_delay, frame_current_pts, -diff);
-#endif
+
+    av_dlog(NULL, "video: delay=%0.3f pts=%0.3f A-V=%f\n",
+            delay, frame_current_pts, -diff);
 
     return is->frame_timer;
 }

From ac4a8548110bc180cb67bea6eaf8b8e1081370cf Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 15:53:34 +0200
Subject: [PATCH 672/830] Remove some non-compiling debug messages.

---
 libavcodec/motion_est.c | 2 --
 libavformat/mpeg.c      | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 4d4377be6a..5f10456d3b 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -1119,8 +1119,6 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 //    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
     c->mc_mb_var_sum_temp += (vard+128)>>8;
 
-    av_dlog(s, "varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
-            varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
     if(mb_type){
         int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
         int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index 86cee0c4b8..5b4996e825 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -583,9 +583,6 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index,
     int64_t pos, pts, dts;
 
     pos = *ppos;
-#ifdef DEBUG_SEEK
-    printf("read_dts: pos=0x%"PRIx64" next=%d -> ", pos, find_next);
-#endif
     if (avio_seek(s->pb, pos, SEEK_SET) < 0)
         return AV_NOPTS_VALUE;
 

From 21bf6d7aab8dd4d0cdda19e33fa4c756fcec467d Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 8 May 2011 20:46:41 +0200
Subject: [PATCH 673/830] mpegvideo: use av_get_picture_type_char() in
 ff_print_debug_info()

---
 libavcodec/mpegvideo.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 9397c657ad..8e9a79167d 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1319,15 +1319,8 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
         int x,y;
 
-        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
-        switch (pict->pict_type) {
-            case AV_PICTURE_TYPE_I: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
-            case AV_PICTURE_TYPE_P: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
-            case AV_PICTURE_TYPE_B: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
-            case AV_PICTURE_TYPE_S: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
-            case AV_PICTURE_TYPE_SI: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
-            case AV_PICTURE_TYPE_SP: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
-        }
+        av_log(s->avctx, AV_LOG_DEBUG, "New frame, type: %c\n",
+               av_get_picture_type_char(pict->pict_type));
         for(y=0; y<s->mb_height; y++){
             for(x=0; x<s->mb_width; x++){
                 if(s->avctx->debug&FF_DEBUG_SKIP){

From e77a3095bd7078cca9ee3a868fd0c4ed5bd72578 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 8 Jun 2011 01:24:42 +0200
Subject: [PATCH 674/830] ffplay: remove -vismv option

Use -vismv 1 instead. Simplify.
---
 doc/ffplay.texi | 2 --
 ffplay.c        | 9 ---------
 2 files changed, 11 deletions(-)

diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index de942bb771..0d12fd8ca5 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -97,8 +97,6 @@ the stream and the audio/video synchronisation drift.
 Print specific debug info.
 @item -bug
 Work around bugs.
-@item -vismv
-Visualize motion vectors.
 @item -fast
 Non-spec-compliant optimizations.
 @item -genpts
diff --git a/ffplay.c b/ffplay.c
index 77b1e4e24c..cd2255adec 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -240,7 +240,6 @@ static int av_sync_type = AV_SYNC_AUDIO_MASTER;
 static int64_t start_time = AV_NOPTS_VALUE;
 static int64_t duration = AV_NOPTS_VALUE;
 static int debug = 0;
-static int debug_mv = 0;
 static int step = 0;
 static int thread_count = 1;
 static int workaround_bugs = 1;
@@ -2134,7 +2133,6 @@ static int stream_component_open(VideoState *is, int stream_index)
     if (!codec)
         return -1;
 
-    avctx->debug_mv = debug_mv;
     avctx->debug = debug;
     avctx->workaround_bugs = workaround_bugs;
     avctx->lowres = lowres;
@@ -2892,12 +2890,6 @@ static int opt_debug(const char *opt, const char *arg)
     return 0;
 }
 
-static int opt_vismv(const char *opt, const char *arg)
-{
-    debug_mv = parse_number_or_die(opt, arg, OPT_INT64, INT_MIN, INT_MAX);
-    return 0;
-}
-
 static int opt_thread_count(const char *opt, const char *arg)
 {
     thread_count= parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX);
@@ -2949,7 +2941,6 @@ static const OptionDef options[] = {
     { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" },
     { "debug", HAS_ARG | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
     { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" },
-    { "vismv", HAS_ARG | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" },
     { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" },
     { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" },
     { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&decoder_reorder_pts}, "let decoder reorder pts 0=off 1=on -1=auto", ""},

From 8f2a2e605165efcbb16c3470d48e61e4a53a0f94 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 8 Jun 2011 01:28:11 +0200
Subject: [PATCH 675/830] ffplay: remove -debug option

The options -loglevel LEVEL -debug FLAGS can be used for achieving the
same objectives, with a finer level of control.
---
 doc/ffplay.texi |  2 --
 ffplay.c        | 10 ----------
 2 files changed, 12 deletions(-)

diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index 0d12fd8ca5..f9f5e07cda 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -93,8 +93,6 @@ Set pixel format.
 @item -stats
 Show the stream duration, the codec parameters, the current position in
 the stream and the audio/video synchronisation drift.
-@item -debug
-Print specific debug info.
 @item -bug
 Work around bugs.
 @item -fast
diff --git a/ffplay.c b/ffplay.c
index cd2255adec..1fd9aba955 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -239,7 +239,6 @@ static int show_status = 1;
 static int av_sync_type = AV_SYNC_AUDIO_MASTER;
 static int64_t start_time = AV_NOPTS_VALUE;
 static int64_t duration = AV_NOPTS_VALUE;
-static int debug = 0;
 static int step = 0;
 static int thread_count = 1;
 static int workaround_bugs = 1;
@@ -2133,7 +2132,6 @@ static int stream_component_open(VideoState *is, int stream_index)
     if (!codec)
         return -1;
 
-    avctx->debug = debug;
     avctx->workaround_bugs = workaround_bugs;
     avctx->lowres = lowres;
     if(lowres) avctx->flags |= CODEC_FLAG_EMU_EDGE;
@@ -2883,13 +2881,6 @@ static int opt_duration(const char *opt, const char *arg)
     return 0;
 }
 
-static int opt_debug(const char *opt, const char *arg)
-{
-    av_log_set_level(99);
-    debug = parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX);
-    return 0;
-}
-
 static int opt_thread_count(const char *opt, const char *arg)
 {
     thread_count= parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX);
@@ -2939,7 +2930,6 @@ static const OptionDef options[] = {
     { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format", "format" },
     { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" },
-    { "debug", HAS_ARG | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" },
     { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" },
     { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" },
     { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" },

From 1e9b3026908e556d1a2b6afb76d48df9272f8f95 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Tue, 7 Jun 2011 22:14:18 +0200
Subject: [PATCH 676/830] libx264: fix double free

---
 libavcodec/libx264.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index b0cca65a7c..1b6f55f801 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -170,14 +170,6 @@ static av_cold int X264_close(AVCodecContext *avctx)
     if (x4->enc)
         x264_encoder_close(x4->enc);
 
-    av_free(x4->preset);
-    av_free(x4->tune);
-    av_free(x4->profile);
-    av_free(x4->level);
-    av_free(x4->stats);
-    av_free(x4->weightp);
-    av_free(x4->x264opts);
-
     return 0;
 }
 

From 7d89f7cbf3ccd98f9a5f58db97effa9afd2d571a Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Wed, 8 Jun 2011 02:20:53 +0200
Subject: [PATCH 677/830] crypto: fix potential double free

---
 libavformat/crypto.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index 789a4d1e76..03bfeddba4 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -97,8 +97,8 @@ static int crypto_open(URLContext *h, const char *uri, int flags)
 
     return 0;
 err:
-    av_free(c->key);
-    av_free(c->iv);
+    av_freep(c->key);
+    av_freep(c->iv);
     return ret;
 }
 
@@ -157,8 +157,6 @@ static int crypto_close(URLContext *h)
     if (c->hd)
         ffurl_close(c->hd);
     av_freep(&c->aes);
-    av_freep(&c->key);
-    av_freep(&c->iv);
     return 0;
 }
 

From d9f80ea2a7325f9c84307568843512811a99baff Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 22 May 2011 12:45:00 +0200
Subject: [PATCH 678/830] Move metadata API from lavf to lavu.

Rename it to AVDictionary, since it will be used as such.  Tags
documentation and metadata conversion API is lavf-specific, so remains
there.
---
 doc/APIchanges         |   5 ++
 libavformat/avformat.h |  54 ++++++++++---------
 libavformat/metadata.c | 115 ++++++++++-------------------------------
 libavformat/metadata.h |   6 +--
 libavutil/Makefile     |   2 +
 libavutil/avutil.h     |   2 +-
 libavutil/dict.c       | 110 +++++++++++++++++++++++++++++++++++++++
 libavutil/dict.h       |  78 ++++++++++++++++++++++++++++
 libavutil/internal.h   |   6 +++
 9 files changed, 259 insertions(+), 119 deletions(-)
 create mode 100644 libavutil/dict.c
 create mode 100644 libavutil/dict.h

diff --git a/doc/APIchanges b/doc/APIchanges
index 0ce63fc382..b57868dfdf 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,11 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-xx - xxxxxxx - lavu 51.5.0 - AVMetadata
+  Move AVMetadata from lavf to lavu and rename it to
+  AVDictionary -- new installed header dict.h.
+  All av_metadata_* functions renamed to av_dict_*.
+
 2011-06-07 - a6703fa - lavu 51.4.0 - av_get_bytes_per_sample()
   Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
   Deprecate av_get_bits_per_sample_fmt().
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 5b67211959..9369aa98ed 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -40,6 +40,7 @@ const char *avformat_license(void);
 #include <time.h>
 #include <stdio.h>  /* FILE */
 #include "libavcodec/avcodec.h"
+#include "libavutil/dict.h"
 
 #include "avio.h"
 #include "libavformat/version.h"
@@ -106,21 +107,24 @@ struct AVFormatContext;
  * variant_bitrate -- the total bitrate of the bitrate variant that the current stream is part of
  */
 
-#define AV_METADATA_MATCH_CASE      1
-#define AV_METADATA_IGNORE_SUFFIX   2
-#define AV_METADATA_DONT_STRDUP_KEY 4
-#define AV_METADATA_DONT_STRDUP_VAL 8
-#define AV_METADATA_DONT_OVERWRITE 16   ///< Don't overwrite existing tags.
-
-typedef struct {
-    char *key;
-    char *value;
-}AVMetadataTag;
-
-typedef struct AVMetadata AVMetadata;
 #if FF_API_OLD_METADATA2
+/**
+ * @defgroup old_metadata Old metadata API
+ * The following functions are deprecated, use
+ * their equivalents from libavutil/dict.h instead.
+ * @{
+ */
+
+#define AV_METADATA_MATCH_CASE      AV_DICT_MATCH_CASE
+#define AV_METADATA_IGNORE_SUFFIX   AV_DICT_IGNORE_SUFFIX
+#define AV_METADATA_DONT_STRDUP_KEY AV_DICT_DONT_STRDUP_KEY
+#define AV_METADATA_DONT_STRDUP_VAL AV_DICT_DONT_STRDUP_VAL
+#define AV_METADATA_DONT_OVERWRITE  AV_DICT_DONT_OVERWRITE
+
+typedef attribute_deprecated AVDictionary AVMetadata;
+typedef attribute_deprecated AVDictionaryEntry  AVMetadataTag;
+
 typedef struct AVMetadataConv AVMetadataConv;
-#endif
 
 /**
  * Get a metadata element with matching key.
@@ -130,8 +134,8 @@ typedef struct AVMetadataConv AVMetadataConv;
  * @param flags Allows case as well as suffix-insensitive comparisons.
  * @return Found tag or NULL, changing key or value leads to undefined behavior.
  */
-AVMetadataTag *
-av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags);
+attribute_deprecated AVDictionaryEntry *
+av_metadata_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags);
 
 /**
  * Set the given tag in *pm, overwriting an existing tag.
@@ -143,30 +147,32 @@ av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int f
  *        Passing a NULL value will cause an existing tag to be deleted.
  * @return >= 0 on success otherwise an error code <0
  */
-int av_metadata_set2(AVMetadata **pm, const char *key, const char *value, int flags);
+attribute_deprecated int av_metadata_set2(AVDictionary **pm, const char *key, const char *value, int flags);
 
-#if FF_API_OLD_METADATA2
 /**
  * This function is provided for compatibility reason and currently does nothing.
  */
 attribute_deprecated void av_metadata_conv(struct AVFormatContext *ctx, const AVMetadataConv *d_conv,
                                                                         const AVMetadataConv *s_conv);
-#endif
 
 /**
- * Copy metadata from one AVMetadata struct into another.
- * @param dst pointer to a pointer to a AVMetadata struct. If *dst is NULL,
+ * Copy metadata from one AVDictionary struct into another.
+ * @param dst pointer to a pointer to a AVDictionary struct. If *dst is NULL,
  *            this function will allocate a struct for you and put it in *dst
- * @param src pointer to source AVMetadata struct
+ * @param src pointer to source AVDictionary struct
  * @param flags flags to use when setting metadata in *dst
  * @note metadata is read using the AV_METADATA_IGNORE_SUFFIX flag
  */
-void av_metadata_copy(AVMetadata **dst, AVMetadata *src, int flags);
+attribute_deprecated void av_metadata_copy(AVDictionary **dst, AVDictionary *src, int flags);
 
 /**
- * Free all the memory allocated for an AVMetadata struct.
+ * Free all the memory allocated for an AVDictionary struct.
  */
-void av_metadata_free(AVMetadata **m);
+attribute_deprecated void av_metadata_free(AVDictionary **m);
+/**
+ * @}
+ */
+#endif
 
 
 /* packet functions */
diff --git a/libavformat/metadata.c b/libavformat/metadata.c
index f0275462fe..d8957dfa95 100644
--- a/libavformat/metadata.c
+++ b/libavformat/metadata.c
@@ -21,107 +21,51 @@
 #include <strings.h>
 #include "avformat.h"
 #include "metadata.h"
-
-AVMetadataTag *
-av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags)
-{
-    unsigned int i, j;
-
-    if(!m)
-        return NULL;
-
-    if(prev) i= prev - m->elems + 1;
-    else     i= 0;
-
-    for(; i<m->count; i++){
-        const char *s= m->elems[i].key;
-        if(flags & AV_METADATA_MATCH_CASE) for(j=0;         s[j]  ==         key[j]  && key[j]; j++);
-        else                               for(j=0; toupper(s[j]) == toupper(key[j]) && key[j]; j++);
-        if(key[j])
-            continue;
-        if(s[j] && !(flags & AV_METADATA_IGNORE_SUFFIX))
-            continue;
-        return &m->elems[i];
-    }
-    return NULL;
-}
-
-int av_metadata_set2(AVMetadata **pm, const char *key, const char *value, int flags)
-{
-    AVMetadata *m= *pm;
-    AVMetadataTag *tag= av_metadata_get(m, key, NULL, flags);
-
-    if(!m)
-        m=*pm= av_mallocz(sizeof(*m));
-
-    if(tag){
-        if (flags & AV_METADATA_DONT_OVERWRITE)
-            return 0;
-        av_free(tag->value);
-        av_free(tag->key);
-        *tag= m->elems[--m->count];
-    }else{
-        AVMetadataTag *tmp= av_realloc(m->elems, (m->count+1) * sizeof(*m->elems));
-        if(tmp){
-            m->elems= tmp;
-        }else
-            return AVERROR(ENOMEM);
-    }
-    if(value){
-        if(flags & AV_METADATA_DONT_STRDUP_KEY){
-            m->elems[m->count].key  = key;
-        }else
-        m->elems[m->count].key  = av_strdup(key  );
-        if(flags & AV_METADATA_DONT_STRDUP_VAL){
-            m->elems[m->count].value= value;
-        }else
-        m->elems[m->count].value= av_strdup(value);
-        m->count++;
-    }
-    if(!m->count) {
-        av_free(m->elems);
-        av_freep(pm);
-    }
-
-    return 0;
-}
+#include "libavutil/dict.h"
 
 #if FF_API_OLD_METADATA2
+AVDictionaryEntry *
+av_metadata_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags)
+{
+    return av_dict_get(m, key, prev, flags);
+}
+
+int av_metadata_set2(AVDictionary **pm, const char *key, const char *value, int flags)
+{
+    return av_dict_set(pm, key, value, flags);
+}
+
 void av_metadata_conv(AVFormatContext *ctx, const AVMetadataConv *d_conv,
                                             const AVMetadataConv *s_conv)
 {
     return;
 }
-#endif
 
-void av_metadata_free(AVMetadata **pm)
+void av_metadata_free(AVDictionary **pm)
 {
-    AVMetadata *m= *pm;
-
-    if(m){
-        while(m->count--){
-            av_free(m->elems[m->count].key);
-            av_free(m->elems[m->count].value);
-        }
-        av_free(m->elems);
-    }
-    av_freep(pm);
+    av_dict_free(pm);
 }
 
-void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv,
+void av_metadata_copy(AVDictionary **dst, AVDictionary *src, int flags)
+{
+    av_dict_copy(dst, src, flags);
+}
+#endif
+
+void ff_metadata_conv(AVDictionary **pm, const AVMetadataConv *d_conv,
                                        const AVMetadataConv *s_conv)
 {
     /* TODO: use binary search to look up the two conversion tables
        if the tables are getting big enough that it would matter speed wise */
     const AVMetadataConv *sc, *dc;
-    AVMetadataTag *mtag = NULL;
-    AVMetadata *dst = NULL;
+    AVDictionaryEntry *mtag = NULL;
+    AVDictionary *dst = NULL;
     const char *key;
 
     if (d_conv == s_conv)
         return;
 
-    while((mtag=av_metadata_get(*pm, "", mtag, AV_METADATA_IGNORE_SUFFIX))) {
+    while ((mtag = av_dict_get(*pm, "", mtag, AV_DICT_IGNORE_SUFFIX))) {
         key = mtag->key;
         if (s_conv)
             for (sc=s_conv; sc->native; sc++)
@@ -135,9 +79,9 @@ void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv,
                     key = dc->native;
                     break;
                 }
-        av_metadata_set2(&dst, key, mtag->value, 0);
+        av_dict_set(&dst, key, mtag->value, 0);
     }
-    av_metadata_free(pm);
+    av_dict_free(pm);
     *pm = dst;
 }
 
@@ -154,10 +98,3 @@ void ff_metadata_conv_ctx(AVFormatContext *ctx, const AVMetadataConv *d_conv,
         ff_metadata_conv(&ctx->programs[i]->metadata, d_conv, s_conv);
 }
 
-void av_metadata_copy(AVMetadata **dst, AVMetadata *src, int flags)
-{
-    AVMetadataTag *t = NULL;
-
-    while ((t = av_metadata_get(src, "", t, AV_METADATA_IGNORE_SUFFIX)))
-        av_metadata_set2(dst, t->key, t->value, flags);
-}
diff --git a/libavformat/metadata.h b/libavformat/metadata.h
index 09066b8c8d..49be8b47e1 100644
--- a/libavformat/metadata.h
+++ b/libavformat/metadata.h
@@ -29,11 +29,7 @@
 
 
 #include "avformat.h"
-
-struct AVMetadata{
-    int count;
-    AVMetadataTag *elems;
-};
+#include "libavutil/dict.h"
 
 struct AVMetadataConv{
     const char *native;
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 01231bd52d..0d8f01dd8d 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -27,6 +27,7 @@ HEADERS = adler32.h                                                     \
           mathematics.h                                                 \
           md5.h                                                         \
           mem.h                                                         \
+          dict.h                                                        \
           opt.h                                                         \
           parseutils.h                                                  \
           pixdesc.h                                                     \
@@ -60,6 +61,7 @@ OBJS = adler32.o                                                        \
        mathematics.o                                                    \
        md5.o                                                            \
        mem.o                                                            \
+       dict.o                                                           \
        opt.o                                                            \
        parseutils.o                                                     \
        pixdesc.o                                                        \
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 8536790109..9c660f3a99 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  4
+#define LIBAVUTIL_VERSION_MINOR  5
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/dict.c b/libavutil/dict.c
new file mode 100644
index 0000000000..56f1513d32
--- /dev/null
+++ b/libavutil/dict.c
@@ -0,0 +1,110 @@
+/*
+ * copyright (c) 2009 Michael Niedermayer
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <strings.h>
+#include "dict.h"
+#include "internal.h"
+#include "mem.h"
+
+AVDictionaryEntry *
+av_dict_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags)
+{
+    unsigned int i, j;
+
+    if(!m)
+        return NULL;
+
+    if(prev) i= prev - m->elems + 1;
+    else     i= 0;
+
+    for(; i<m->count; i++){
+        const char *s= m->elems[i].key;
+        if(flags & AV_DICT_MATCH_CASE) for(j=0;         s[j]  ==         key[j]  && key[j]; j++);
+        else                               for(j=0; toupper(s[j]) == toupper(key[j]) && key[j]; j++);
+        if(key[j])
+            continue;
+        if(s[j] && !(flags & AV_DICT_IGNORE_SUFFIX))
+            continue;
+        return &m->elems[i];
+    }
+    return NULL;
+}
+
+int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
+{
+    AVDictionary      *m = *pm;
+    AVDictionaryEntry *tag = av_dict_get(m, key, NULL, flags);
+
+    if(!m)
+        m = *pm = av_mallocz(sizeof(*m));
+
+    if(tag) {
+        if (flags & AV_DICT_DONT_OVERWRITE)
+            return 0;
+        av_free(tag->value);
+        av_free(tag->key);
+        *tag = m->elems[--m->count];
+    } else {
+        AVDictionaryEntry *tmp = av_realloc(m->elems, (m->count+1) * sizeof(*m->elems));
+        if(tmp) {
+            m->elems = tmp;
+        } else
+            return AVERROR(ENOMEM);
+    }
+    if (value) {
+        if (flags & AV_DICT_DONT_STRDUP_KEY) {
+            m->elems[m->count].key  = key;
+        } else
+        m->elems[m->count].key  = av_strdup(key  );
+        if (flags & AV_DICT_DONT_STRDUP_VAL) {
+            m->elems[m->count].value = value;
+        } else
+            m->elems[m->count].value = av_strdup(value);
+        m->count++;
+    }
+    if (!m->count) {
+        av_free(m->elems);
+        av_freep(pm);
+    }
+
+    return 0;
+}
+
+void av_dict_free(AVDictionary **pm)
+{
+    AVDictionary *m = *pm;
+
+    if (m) {
+        while(m->count--) {
+            av_free(m->elems[m->count].key);
+            av_free(m->elems[m->count].value);
+        }
+        av_free(m->elems);
+    }
+    av_freep(pm);
+}
+
+void av_dict_copy(AVDictionary **dst, AVDictionary *src, int flags)
+{
+    AVDictionaryEntry *t = NULL;
+
+    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX)))
+        av_dict_set(dst, t->key, t->value, flags);
+}
diff --git a/libavutil/dict.h b/libavutil/dict.h
new file mode 100644
index 0000000000..bfd7f2682c
--- /dev/null
+++ b/libavutil/dict.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file Public dictionary API.
+ */
+
+#ifndef AVUTIL_DICT_H
+#define AVUTIL_DICT_H
+
+#define AV_DICT_MATCH_CASE      1
+#define AV_DICT_IGNORE_SUFFIX   2
+#define AV_DICT_DONT_STRDUP_KEY 4
+#define AV_DICT_DONT_STRDUP_VAL 8
+#define AV_DICT_DONT_OVERWRITE 16   ///< Don't overwrite existing entries.
+
+typedef struct {
+    char *key;
+    char *value;
+} AVDictionaryEntry;
+
+typedef struct AVDictionary AVDictionary;
+
+/**
+ * Get a dictionary entry with matching key.
+ *
+ * @param prev Set to the previous matching element to find the next.
+ *             If set to NULL the first matching element is returned.
+ * @param flags Allows case as well as suffix-insensitive comparisons.
+ * @return Found entry or NULL, changing key or value leads to undefined behavior.
+ */
+AVDictionaryEntry *
+av_dict_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags);
+
+/**
+ * Set the given entry in *pm, overwriting an existing entry.
+ *
+ * @param pm pointer to a pointer to a dictionary struct. If *pm is NULL
+ * a dictionary struct is allocated and put in *pm.
+ * @param key entry key to add to *pm (will be av_strduped depending on flags)
+ * @param value entry value to add to *pm (will be av_strduped depending on flags).
+ *        Passing a NULL value will cause an existing tag to be deleted.
+ * @return >= 0 on success otherwise an error code <0
+ */
+int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags);
+
+/**
+ * Copy entries from one AVDictionary struct into another.
+ * @param dst pointer to a pointer to a AVDictionary struct. If *dst is NULL,
+ *            this function will allocate a struct for you and put it in *dst
+ * @param src pointer to source AVDictionary struct
+ * @param flags flags to use when setting entries in *dst
+ * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag
+ */
+void av_dict_copy(AVDictionary **dst, AVDictionary *src, int flags);
+
+/**
+ * Free all the memory allocated for an AVDictionary struct.
+ */
+void av_dict_free(AVDictionary **m);
+
+#endif // AVUTIL_DICT_H
diff --git a/libavutil/internal.h b/libavutil/internal.h
index ee11a0a9d2..5ed3fb81c3 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -37,6 +37,12 @@
 #include "config.h"
 #include "attributes.h"
 #include "timer.h"
+#include "dict.h"
+
+struct AVDictionary {
+    int count;
+    AVDictionaryEntry *elems;
+};
 
 #ifndef attribute_align_arg
 #if ARCH_X86_32 && AV_GCC_VERSION_AT_LEAST(4,2)

From d2d67e424fa10d883e13709095c80bd3b502ce03 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 22 May 2011 12:46:29 +0200
Subject: [PATCH 679/830] Remove all uses of now deprecated metadata functions.

---
 ffmpeg.c                     | 29 +++++++++++++++--------------
 ffprobe.c                    |  9 +++++----
 ffserver.c                   | 19 ++++++++++---------
 libavformat/aiffdec.c        |  3 ++-
 libavformat/apetag.c         |  3 ++-
 libavformat/applehttp.c      |  3 ++-
 libavformat/asfdec.c         |  5 +++--
 libavformat/asfenc.c         | 18 +++++++++---------
 libavformat/avformat.h       | 10 +++++-----
 libavformat/avidec.c         | 15 ++++++++-------
 libavformat/avienc.c         |  7 ++++---
 libavformat/cafdec.c         |  3 ++-
 libavformat/ffmetadec.c      |  7 ++++---
 libavformat/ffmetaenc.c      |  7 ++++---
 libavformat/flacenc.c        |  3 +--
 libavformat/flvdec.c         |  7 ++++---
 libavformat/flvenc.c         |  5 +++--
 libavformat/id3v1.c          |  7 ++++---
 libavformat/id3v2.c          | 28 ++++++++++++++--------------
 libavformat/iff.c            |  3 ++-
 libavformat/matroskadec.c    | 17 +++++++++--------
 libavformat/matroskaenc.c    | 31 ++++++++++++++++---------------
 libavformat/metadata.h       |  2 +-
 libavformat/mov.c            | 19 ++++++++++---------
 libavformat/movenc.c         | 25 +++++++++++++------------
 libavformat/mp3dec.c         |  3 ++-
 libavformat/mp3enc.c         | 17 +++++++++--------
 libavformat/mpc.c            |  3 ++-
 libavformat/mpegts.c         | 11 ++++++-----
 libavformat/mpegtsenc.c      | 11 ++++++-----
 libavformat/nsvdec.c         |  3 ++-
 libavformat/nutdec.c         |  5 +++--
 libavformat/nutenc.c         |  9 +++++----
 libavformat/oggdec.h         |  2 +-
 libavformat/oggenc.c         |  6 +++---
 libavformat/oggparsevorbis.c | 13 +++++++------
 libavformat/r3d.c            |  3 ++-
 libavformat/rmdec.c          |  3 ++-
 libavformat/rmenc.c          |  7 ++++---
 libavformat/rpl.c            |  7 ++++---
 libavformat/rtpdec_asf.c     |  2 +-
 libavformat/rtsp.c           |  5 +++--
 libavformat/sauce.c          |  5 +++--
 libavformat/sdp.c            |  3 ++-
 libavformat/soxdec.c         |  5 +++--
 libavformat/soxenc.c         |  5 +++--
 libavformat/tta.c            |  3 ++-
 libavformat/tty.c            |  3 ++-
 libavformat/utils.c          | 25 +++++++++++++------------
 libavformat/vorbiscomment.c  | 13 +++++++------
 libavformat/vorbiscomment.h  |  6 +++---
 libavformat/vqf.c            |  3 ++-
 libavformat/wc3movie.c       |  5 +++--
 libavformat/wtv.c            |  7 ++++---
 libavformat/wv.c             |  3 ++-
 55 files changed, 263 insertions(+), 218 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 1c4c4b15db..5099fc29ea 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -39,6 +39,7 @@
 #include "libavutil/colorspace.h"
 #include "libavutil/fifo.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/avstring.h"
 #include "libavutil/libm.h"
@@ -189,7 +190,7 @@ static int64_t start_time = 0;
 static int64_t recording_timestamp = 0;
 static int64_t input_ts_offset = 0;
 static int file_overwrite = 0;
-static AVMetadata *metadata;
+static AVDictionary *metadata;
 static int do_benchmark = 0;
 static int do_hex_dump = 0;
 static int do_pkt_dump = 0;
@@ -1894,7 +1895,7 @@ static int copy_chapters(int infile, int outfile)
         out_ch->end       = FFMIN(rt, in_ch->end   - ts_off);
 
         if (metadata_chapters_autocopy)
-            av_metadata_copy(&out_ch->metadata, in_ch->metadata, 0);
+            av_dict_copy(&out_ch->metadata, in_ch->metadata, 0);
 
         os->nb_chapters++;
         os->chapters = av_realloc(os->chapters, sizeof(AVChapter)*os->nb_chapters);
@@ -2083,8 +2084,8 @@ static int transcode(AVFormatContext **output_files,
         icodec = ist->st->codec;
 
         if (metadata_streams_autocopy)
-            av_metadata_copy(&ost->st->metadata, ist->st->metadata,
-                             AV_METADATA_DONT_OVERWRITE);
+            av_dict_copy(&ost->st->metadata, ist->st->metadata,
+                         AV_DICT_DONT_OVERWRITE);
 
         ost->st->disposition = ist->st->disposition;
         codec->bits_per_raw_sample= icodec->bits_per_raw_sample;
@@ -2352,7 +2353,7 @@ static int transcode(AVFormatContext **output_files,
     /* set meta data information from input file if required */
     for (i=0;i<nb_meta_data_maps;i++) {
         AVFormatContext *files[2];
-        AVMetadata      **meta[2];
+        AVDictionary    **meta[2];
         int j;
 
 #define METADATA_CHECK_INDEX(index, nb_elems, desc)\
@@ -2395,15 +2396,15 @@ static int transcode(AVFormatContext **output_files,
             }
         }
 
-        av_metadata_copy(meta[0], *meta[1], AV_METADATA_DONT_OVERWRITE);
+        av_dict_copy(meta[0], *meta[1], AV_DICT_DONT_OVERWRITE);
     }
 
     /* copy global metadata by default */
     if (metadata_global_autocopy) {
 
         for (i = 0; i < nb_output_files; i++)
-            av_metadata_copy(&output_files[i]->metadata, input_files[0].ctx->metadata,
-                             AV_METADATA_DONT_OVERWRITE);
+            av_dict_copy(&output_files[i]->metadata, input_files[0].ctx->metadata,
+                         AV_DICT_DONT_OVERWRITE);
     }
 
     /* copy chapters according to chapter maps */
@@ -2818,7 +2819,7 @@ static int opt_metadata(const char *opt, const char *arg)
     }
     *mid++= 0;
 
-    av_metadata_set2(&metadata, arg, mid, 0);
+    av_dict_set(&metadata, arg, mid, 0);
 
     return 0;
 }
@@ -3523,7 +3524,7 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
             parse_forced_key_frames(forced_key_frames, ost, video_enc);
     }
     if (video_language) {
-        av_metadata_set2(&st->metadata, "language", video_language, 0);
+        av_dict_set(&st->metadata, "language", video_language, 0);
         av_freep(&video_language);
     }
 
@@ -3603,7 +3604,7 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
     }
     audio_enc->time_base= (AVRational){1, audio_sample_rate};
     if (audio_language) {
-        av_metadata_set2(&st->metadata, "language", audio_language, 0);
+        av_dict_set(&st->metadata, "language", audio_language, 0);
         av_freep(&audio_language);
     }
 
@@ -3699,7 +3700,7 @@ static void new_subtitle_stream(AVFormatContext *oc, int file_idx)
     }
 
     if (subtitle_language) {
-        av_metadata_set2(&st->metadata, "language", subtitle_language, 0);
+        av_dict_set(&st->metadata, "language", subtitle_language, 0);
         av_freep(&subtitle_language);
     }
 
@@ -3830,8 +3831,8 @@ static void opt_output_file(const char *filename)
 
         oc->timestamp = recording_timestamp;
 
-        av_metadata_copy(&oc->metadata, metadata, 0);
-        av_metadata_free(&metadata);
+        av_dict_copy(&oc->metadata, metadata, 0);
+        av_dict_free(&metadata);
     }
 
     output_files[nb_output_files++] = oc;
diff --git a/ffprobe.c b/ffprobe.c
index 508800e8d2..e00790f54b 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -25,6 +25,7 @@
 #include "libavcodec/avcodec.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/dict.h"
 #include "libavdevice/avdevice.h"
 #include "cmdutils.h"
 
@@ -160,7 +161,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx)
     AVCodecContext *dec_ctx;
     AVCodec *dec;
     char val_str[128];
-    AVMetadataTag *tag = NULL;
+    AVDictionaryEntry *tag = NULL;
     AVRational display_aspect_ratio;
 
     printf("[STREAM]\n");
@@ -226,7 +227,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx)
     if (stream->nb_frames)
         printf("nb_frames=%"PRId64"\n",    stream->nb_frames);
 
-    while ((tag = av_metadata_get(stream->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX)))
+    while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
         printf("TAG:%s=%s\n", tag->key, tag->value);
 
     printf("[/STREAM]\n");
@@ -234,7 +235,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx)
 
 static void show_format(AVFormatContext *fmt_ctx)
 {
-    AVMetadataTag *tag = NULL;
+    AVDictionaryEntry *tag = NULL;
     char val_str[128];
 
     printf("[FORMAT]\n");
@@ -252,7 +253,7 @@ static void show_format(AVFormatContext *fmt_ctx)
     printf("bit_rate=%s\n",         value_string(val_str, sizeof(val_str), fmt_ctx->bit_rate,
                                                  unit_bit_per_second_str));
 
-    while ((tag = av_metadata_get(fmt_ctx->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX)))
+    while ((tag = av_dict_get(fmt_ctx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
         printf("TAG:%s=%s\n", tag->key, tag->value);
 
     printf("[/FORMAT]\n");
diff --git a/ffserver.c b/ffserver.c
index 30edb05969..0f57979c9c 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -36,6 +36,7 @@
 #include "libavformat/avio_internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/lfg.h"
+#include "libavutil/dict.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/opt.h"
@@ -854,7 +855,7 @@ static void close_connection(HTTPContext *c)
         ctx = c->rtp_ctx[i];
         if (ctx) {
             av_write_trailer(ctx);
-            av_metadata_free(&ctx->metadata);
+            av_dict_free(&ctx->metadata);
             av_free(ctx->streams[0]);
             av_free(ctx);
         }
@@ -2224,10 +2225,10 @@ static int http_prepare_data(HTTPContext *c)
     switch(c->state) {
     case HTTPSTATE_SEND_DATA_HEADER:
         memset(&c->fmt_ctx, 0, sizeof(c->fmt_ctx));
-        av_metadata_set2(&c->fmt_ctx.metadata, "author"   , c->stream->author   , 0);
-        av_metadata_set2(&c->fmt_ctx.metadata, "comment"  , c->stream->comment  , 0);
-        av_metadata_set2(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0);
-        av_metadata_set2(&c->fmt_ctx.metadata, "title"    , c->stream->title    , 0);
+        av_dict_set(&c->fmt_ctx.metadata, "author"   , c->stream->author   , 0);
+        av_dict_set(&c->fmt_ctx.metadata, "comment"  , c->stream->comment  , 0);
+        av_dict_set(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0);
+        av_dict_set(&c->fmt_ctx.metadata, "title"    , c->stream->title    , 0);
 
         c->fmt_ctx.streams = av_mallocz(sizeof(AVStream *) * c->stream->nb_streams);
 
@@ -2272,7 +2273,7 @@ static int http_prepare_data(HTTPContext *c)
             http_log("Error writing output header\n");
             return -1;
         }
-        av_metadata_free(&c->fmt_ctx.metadata);
+        av_dict_free(&c->fmt_ctx.metadata);
 
         len = avio_close_dyn_buf(c->fmt_ctx.pb, &c->pb_buffer);
         c->buffer_ptr = c->pb_buffer;
@@ -2927,8 +2928,8 @@ static int prepare_sdp_description(FFStream *stream, uint8_t **pbuffer,
     if (avc == NULL) {
         return -1;
     }
-    av_metadata_set2(&avc->metadata, "title",
-                     stream->title[0] ? stream->title : "No Title", 0);
+    av_dict_set(&avc->metadata, "title",
+               stream->title[0] ? stream->title : "No Title", 0);
     avc->nb_streams = stream->nb_streams;
     if (stream->is_multicast) {
         snprintf(avc->filename, 1024, "rtp://%s:%d?multicast=1?ttl=%d",
@@ -2954,7 +2955,7 @@ static int prepare_sdp_description(FFStream *stream, uint8_t **pbuffer,
 
  sdp_done:
     av_free(avc->streams);
-    av_metadata_free(&avc->metadata);
+    av_dict_free(&avc->metadata);
     av_free(avc);
     av_free(avs);
 
diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c
index 8678f9bb9b..0e815421a7 100644
--- a/libavformat/aiffdec.c
+++ b/libavformat/aiffdec.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "pcm.h"
 #include "aiff.h"
@@ -79,7 +80,7 @@ static void get_meta(AVFormatContext *s, const char *key, int size)
         return;
 
     str[res] = 0;
-    av_metadata_set2(&s->metadata, key, str, AV_METADATA_DONT_STRDUP_VAL);
+    av_dict_set(&s->metadata, key, str, AV_DICT_DONT_STRDUP_VAL);
 }
 
 /* Returns the number of sound data frames or negative on error */
diff --git a/libavformat/apetag.c b/libavformat/apetag.c
index 2eb1673cc4..257ed48970 100644
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -21,6 +21,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "apetag.h"
 
@@ -57,7 +58,7 @@ static int ape_tag_read_field(AVFormatContext *s)
         return AVERROR(ENOMEM);
     avio_read(pb, value, size);
     value[size] = 0;
-    av_metadata_set2(&s->metadata, key, value, AV_METADATA_DONT_STRDUP_VAL);
+    av_dict_set(&s->metadata, key, value, AV_DICT_DONT_STRDUP_VAL);
     return 0;
 }
 
diff --git a/libavformat/applehttp.c b/libavformat/applehttp.c
index e3b1500044..86e8b5fbce 100644
--- a/libavformat/applehttp.c
+++ b/libavformat/applehttp.c
@@ -28,6 +28,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "internal.h"
 #include <unistd.h>
@@ -505,7 +506,7 @@ static int applehttp_read_header(AVFormatContext *s, AVFormatParameters *ap)
             }
             avcodec_copy_context(st->codec, v->ctx->streams[j]->codec);
             if (v->bandwidth)
-                av_metadata_set2(&st->metadata, "variant_bitrate", bitrate_str,
+                av_dict_set(&st->metadata, "variant_bitrate", bitrate_str,
                                  0);
         }
         stream_offset += v->ctx->nb_streams;
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 789df1274d..16bba93c37 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -24,6 +24,7 @@
 #include "libavutil/bswap.h"
 #include "libavutil/common.h"
 #include "libavutil/avstring.h"
+#include "libavutil/dict.h"
 #include "libavcodec/mpegaudio.h"
 #include "avformat.h"
 #include "avio_internal.h"
@@ -179,7 +180,7 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
         goto finish;
     }
     if (*value)
-        av_metadata_set2(&s->metadata, key, value, 0);
+        av_dict_set(&s->metadata, key, value, 0);
 finish:
     av_freep(&value);
     avio_seek(s->pb, off + len, SEEK_SET);
@@ -689,7 +690,7 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap)
                     const char primary_tag[3] = { rfc1766[0], rfc1766[1], '\0' }; // ignore country code if any
                     const char *iso6392 = av_convert_lang_to(primary_tag, AV_LANG_ISO639_2_BIBL);
                     if (iso6392)
-                        av_metadata_set2(&st->metadata, "language", iso6392, 0);
+                        av_dict_set(&st->metadata, "language", iso6392, 0);
                 }
             }
         }
diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c
index f9cc609eef..f9b9b3c045 100644
--- a/libavformat/asfenc.c
+++ b/libavformat/asfenc.c
@@ -19,10 +19,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avformat.h"
-#include "metadata.h"
 #include "riff.h"
 #include "asf.h"
 #include "avio_internal.h"
+#include "libavutil/dict.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -296,7 +296,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
 {
     ASFContext *asf = s->priv_data;
     AVIOContext *pb = s->pb;
-    AVMetadataTag *tags[5];
+    AVDictionaryEntry *tags[5];
     int header_size, n, extra_size, extra_size2, wav_extra_size, file_time;
     int has_title;
     int metadata_count;
@@ -307,11 +307,11 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
 
     ff_metadata_conv(&s->metadata, ff_asf_metadata_conv, NULL);
 
-    tags[0] = av_metadata_get(s->metadata, "title"    , NULL, 0);
-    tags[1] = av_metadata_get(s->metadata, "author"   , NULL, 0);
-    tags[2] = av_metadata_get(s->metadata, "copyright", NULL, 0);
-    tags[3] = av_metadata_get(s->metadata, "comment"  , NULL, 0);
-    tags[4] = av_metadata_get(s->metadata, "rating"   , NULL, 0);
+    tags[0] = av_dict_get(s->metadata, "title"    , NULL, 0);
+    tags[1] = av_dict_get(s->metadata, "author"   , NULL, 0);
+    tags[2] = av_dict_get(s->metadata, "copyright", NULL, 0);
+    tags[3] = av_dict_get(s->metadata, "comment"  , NULL, 0);
+    tags[4] = av_dict_get(s->metadata, "rating"   , NULL, 0);
 
     duration = asf->duration + PREROLL_TIME * 10000;
     has_title = tags[0] || tags[1] || tags[2] || tags[3] || tags[4];
@@ -381,10 +381,10 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
         end_header(pb, hpos);
     }
     if (metadata_count) {
-        AVMetadataTag *tag = NULL;
+        AVDictionaryEntry *tag = NULL;
         hpos = put_header(pb, &ff_asf_extended_content_header);
         avio_wl16(pb, metadata_count);
-        while ((tag = av_metadata_get(s->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) {
+        while ((tag = av_dict_get(s->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
             put_str16(pb, tag->key);
             avio_wl16(pb, 0);
             put_str16(pb, tag->value);
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 9369aa98ed..2b5b50e381 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -161,7 +161,7 @@ attribute_deprecated void av_metadata_conv(struct AVFormatContext *ctx, const AV
  *            this function will allocate a struct for you and put it in *dst
  * @param src pointer to source AVDictionary struct
  * @param flags flags to use when setting metadata in *dst
- * @note metadata is read using the AV_METADATA_IGNORE_SUFFIX flag
+ * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag
  */
 attribute_deprecated void av_metadata_copy(AVDictionary **dst, AVDictionary *src, int flags);
 
@@ -565,7 +565,7 @@ typedef struct AVStream {
      */
     AVRational sample_aspect_ratio;
 
-    AVMetadata *metadata;
+    AVDictionary *metadata;
 
     /* Intended mostly for av_read_frame() support. Not supposed to be used by */
     /* external applications; try to use something else if at all possible.    */
@@ -633,7 +633,7 @@ typedef struct AVProgram {
     enum AVDiscard discard;        ///< selects which program to discard and which to feed to the caller
     unsigned int   *stream_index;
     unsigned int   nb_stream_indexes;
-    AVMetadata *metadata;
+    AVDictionary *metadata;
 } AVProgram;
 
 #define AVFMTCTX_NOHEADER      0x0001 /**< signal that no header is present
@@ -643,7 +643,7 @@ typedef struct AVChapter {
     int id;                 ///< unique ID to identify the chapter
     AVRational time_base;   ///< time base in which the start/end timestamps are specified
     int64_t start, end;     ///< chapter start/end time in time_base units
-    AVMetadata *metadata;
+    AVDictionary *metadata;
 } AVChapter;
 
 /**
@@ -806,7 +806,7 @@ typedef struct AVFormatContext {
 
     struct AVPacketList *packet_buffer_end;
 
-    AVMetadata *metadata;
+    AVDictionary *metadata;
 
     /**
      * Remaining size available for raw_packet_buffer, in bytes.
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index dd2d1d0da6..a915cc6588 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -25,6 +25,7 @@
 #include <strings.h>
 #include "libavutil/intreadwrite.h"
 #include "libavutil/bswap.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "avi.h"
 #include "dv.h"
@@ -261,8 +262,8 @@ static int avi_read_tag(AVFormatContext *s, AVStream *st, uint32_t tag, uint32_t
 
     AV_WL32(key, tag);
 
-    return av_metadata_set2(st ? &st->metadata : &s->metadata, key, value,
-                            AV_METADATA_DONT_STRDUP_VAL);
+    return av_dict_set(st ? &st->metadata : &s->metadata, key, value,
+                            AV_DICT_DONT_STRDUP_VAL);
 }
 
 static void avi_read_info(AVFormatContext *s, uint64_t end)
@@ -277,7 +278,7 @@ static void avi_read_info(AVFormatContext *s, uint64_t end)
 static const char months[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
                                     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
 
-static void avi_metadata_creation_time(AVMetadata **metadata, char *date)
+static void avi_metadata_creation_time(AVDictionary **metadata, char *date)
 {
     char month[4], time[9], buffer[64];
     int i, day, year;
@@ -288,11 +289,11 @@ static void avi_metadata_creation_time(AVMetadata **metadata, char *date)
             if (!strcasecmp(month, months[i])) {
                 snprintf(buffer, sizeof(buffer), "%.4d-%.2d-%.2d %s",
                          year, i+1, day, time);
-                av_metadata_set2(metadata, "creation_time", buffer, 0);
+                av_dict_set(metadata, "creation_time", buffer, 0);
             }
     } else if (date[4] == '/' && date[7] == '/') {
         date[4] = date[7] = '-';
-        av_metadata_set2(metadata, "creation_time", date, 0);
+        av_dict_set(metadata, "creation_time", date, 0);
     }
 }
 
@@ -320,7 +321,7 @@ static void avi_read_nikon(AVFormatContext *s, uint64_t end)
                     break;
                 }
                 if (name)
-                    av_metadata_set2(&s->metadata, name, buffer, 0);
+                    av_dict_set(&s->metadata, name, buffer, 0);
                 avio_skip(s->pb, size);
             }
             break;
@@ -770,7 +771,7 @@ static int read_gab2_sub(AVStream *st, AVPacket *pkt) {
         ret = avio_get_str16le(pb, desc_len, desc, sizeof(desc));
         avio_skip(pb, desc_len - ret);
         if (*desc)
-            av_metadata_set2(&st->metadata, "title", desc, 0);
+            av_dict_set(&st->metadata, "title", desc, 0);
 
         avio_rl16(pb);   /* flags? */
         avio_rl32(pb);   /* data size */
diff --git a/libavformat/avienc.c b/libavformat/avienc.c
index 1b6f64527e..343396cabe 100644
--- a/libavformat/avienc.c
+++ b/libavformat/avienc.c
@@ -23,6 +23,7 @@
 #include "avio_internal.h"
 #include "riff.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 
 /*
  * TODO:
@@ -157,7 +158,7 @@ static int avi_write_header(AVFormatContext *s)
     int bitrate, n, i, nb_frames, au_byterate, au_ssize, au_scale;
     AVCodecContext *stream, *video_enc;
     int64_t list1, list2, strh, strf;
-    AVMetadataTag *t = NULL;
+    AVDictionaryEntry *t = NULL;
 
     if (s->nb_streams > AVI_MAX_STREAM_COUNT) {
         av_log(s, AV_LOG_ERROR, "AVI does not support >%d streams\n",
@@ -297,7 +298,7 @@ static int avi_write_header(AVFormatContext *s)
             return -1;
         }
         ff_end_tag(pb, strf);
-        if ((t = av_metadata_get(s->streams[i]->metadata, "title", NULL, 0))) {
+        if ((t = av_dict_get(s->streams[i]->metadata, "title", NULL, 0))) {
             avi_write_info_tag(s->pb, "strn", t->value);
             t = NULL;
         }
@@ -379,7 +380,7 @@ static int avi_write_header(AVFormatContext *s)
     ffio_wfourcc(pb, "INFO");
     ff_metadata_conv(&s->metadata, ff_avi_metadata_conv, NULL);
     for (i = 0; *ff_avi_tags[i]; i++) {
-        if ((t = av_metadata_get(s->metadata, ff_avi_tags[i], NULL, AV_METADATA_MATCH_CASE)))
+        if ((t = av_dict_get(s->metadata, ff_avi_tags[i], NULL, AV_DICT_MATCH_CASE)))
             avi_write_info_tag(s->pb, t->key, t->value);
     }
     ff_end_tag(pb, list2);
diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c
index c720c34375..dff3b41281 100644
--- a/libavformat/cafdec.c
+++ b/libavformat/cafdec.c
@@ -29,6 +29,7 @@
 #include "riff.h"
 #include "isom.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "caf.h"
 
 typedef struct {
@@ -187,7 +188,7 @@ static void read_info_chunk(AVFormatContext *s, int64_t size)
         char value[1024];
         avio_get_str(pb, INT_MAX, key,   sizeof(key));
         avio_get_str(pb, INT_MAX, value, sizeof(value));
-        av_metadata_set2(&s->metadata, key, value, 0);
+        av_dict_set(&s->metadata, key, value, 0);
     }
 }
 
diff --git a/libavformat/ffmetadec.c b/libavformat/ffmetadec.c
index 6915b60552..e3d800d3c4 100644
--- a/libavformat/ffmetadec.c
+++ b/libavformat/ffmetadec.c
@@ -22,6 +22,7 @@
 #include "avformat.h"
 #include "ffmeta.h"
 #include "internal.h"
+#include "libavutil/dict.h"
 
 static int probe(AVProbeData *p)
 {
@@ -93,7 +94,7 @@ static uint8_t *unescape(uint8_t *buf, int size)
     return ret;
 }
 
-static int read_tag(uint8_t *line, AVMetadata **m)
+static int read_tag(uint8_t *line, AVDictionary **m)
 {
     uint8_t *key, *value, *p = line;
 
@@ -117,13 +118,13 @@ static int read_tag(uint8_t *line, AVMetadata **m)
         return AVERROR(ENOMEM);
     }
 
-    av_metadata_set2(m, key, value, AV_METADATA_DONT_STRDUP_KEY | AV_METADATA_DONT_STRDUP_VAL);
+    av_dict_set(m, key, value, AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL);
     return 0;
 }
 
 static int read_header(AVFormatContext *s, AVFormatParameters *ap)
 {
-    AVMetadata **m = &s->metadata;
+    AVDictionary **m = &s->metadata;
     uint8_t line[1024];
 
     while(!s->pb->eof_reached) {
diff --git a/libavformat/ffmetaenc.c b/libavformat/ffmetaenc.c
index 178da5950a..0aadb8a9cc 100644
--- a/libavformat/ffmetaenc.c
+++ b/libavformat/ffmetaenc.c
@@ -23,6 +23,7 @@
 
 #include "avformat.h"
 #include "ffmeta.h"
+#include "libavutil/dict.h"
 
 
 static void write_escape_str(AVIOContext *s, const uint8_t *str)
@@ -37,10 +38,10 @@ static void write_escape_str(AVIOContext *s, const uint8_t *str)
     }
 }
 
-static void write_tags(AVIOContext *s, AVMetadata *m)
+static void write_tags(AVIOContext *s, AVDictionary *m)
 {
-    AVMetadataTag *t = NULL;
-    while ((t = av_metadata_get(m, "", t, AV_METADATA_IGNORE_SUFFIX))) {
+    AVDictionaryEntry *t = NULL;
+    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) {
         write_escape_str(s, t->key);
         avio_w8(s, '=');
         write_escape_str(s, t->value);
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index 38c3265d0c..fb28a6ed4c 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -22,7 +22,6 @@
 #include "libavcodec/flac.h"
 #include "avformat.h"
 #include "flacenc.h"
-#include "metadata.h"
 #include "vorbiscomment.h"
 #include "libavcodec/bytestream.h"
 
@@ -39,7 +38,7 @@ static int flac_write_block_padding(AVIOContext *pb, unsigned int n_padding_byte
     return 0;
 }
 
-static int flac_write_block_comment(AVIOContext *pb, AVMetadata **m,
+static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
                                     int last_block, int bitexact)
 {
     const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT;
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 6fdbf9b464..c6b386e28f 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/dict.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/mpeg4audio.h"
 #include "avformat.h"
@@ -271,17 +272,17 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst
 
         if(amf_type == AMF_DATA_TYPE_BOOL) {
             av_strlcpy(str_val, num_val > 0 ? "true" : "false", sizeof(str_val));
-            av_metadata_set2(&s->metadata, key, str_val, 0);
+            av_dict_set(&s->metadata, key, str_val, 0);
         } else if(amf_type == AMF_DATA_TYPE_NUMBER) {
             snprintf(str_val, sizeof(str_val), "%.f", num_val);
-            av_metadata_set2(&s->metadata, key, str_val, 0);
+            av_dict_set(&s->metadata, key, str_val, 0);
             if(!strcmp(key, "duration")) s->duration = num_val * AV_TIME_BASE;
             else if(!strcmp(key, "videodatarate") && vcodec && 0 <= (int)(num_val * 1024.0))
                 vcodec->bit_rate = num_val * 1024.0;
             else if(!strcmp(key, "audiodatarate") && acodec && 0 <= (int)(num_val * 1024.0))
                 acodec->bit_rate = num_val * 1024.0;
         } else if (amf_type == AMF_DATA_TYPE_STRING)
-            av_metadata_set2(&s->metadata, key, str_val, 0);
+            av_dict_set(&s->metadata, key, str_val, 0);
     }
 
     return 0;
diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index b20a3f80c5..487993cd9a 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -23,6 +23,7 @@
 #include "internal.h"
 #include "avc.h"
 #include "metadata.h"
+#include "libavutil/dict.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -177,7 +178,7 @@ static int flv_write_header(AVFormatContext *s)
     int i;
     double framerate = 0.0;
     int metadata_size_pos, data_size;
-    AVMetadataTag *tag = NULL;
+    AVDictionaryEntry *tag = NULL;
 
     for(i=0; i<s->nb_streams; i++){
         AVCodecContext *enc = s->streams[i]->codec;
@@ -274,7 +275,7 @@ static int flv_write_header(AVFormatContext *s)
         put_amf_double(pb, audio_enc->codec_tag);
     }
 
-    while ((tag = av_metadata_get(s->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) {
+    while ((tag = av_dict_get(s->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
         put_amf_string(pb, tag->key);
         avio_w8(pb, AMF_DATA_TYPE_STRING);
         put_amf_string(pb, tag->value);
diff --git a/libavformat/id3v1.c b/libavformat/id3v1.c
index d296ebbe57..87930ff361 100644
--- a/libavformat/id3v1.c
+++ b/libavformat/id3v1.c
@@ -21,6 +21,7 @@
 
 #include "id3v1.h"
 #include "libavcodec/avcodec.h"
+#include "libavutil/dict.h"
 
 const char * const ff_id3v1_genre_str[ID3v1_GENRE_MAX + 1] = {
       [0] = "Blues",
@@ -191,7 +192,7 @@ static void get_string(AVFormatContext *s, const char *key,
     *q = '\0';
 
     if (*str)
-        av_metadata_set2(&s->metadata, key, str, 0);
+        av_dict_set(&s->metadata, key, str, 0);
 }
 
 /**
@@ -215,11 +216,11 @@ static int parse_tag(AVFormatContext *s, const uint8_t *buf)
     get_string(s, "comment", buf + 97, 30);
     if (buf[125] == 0 && buf[126] != 0) {
         snprintf(str, sizeof(str), "%d", buf[126]);
-        av_metadata_set2(&s->metadata, "track", str, 0);
+        av_dict_set(&s->metadata, "track", str, 0);
     }
     genre = buf[127];
     if (genre <= ID3v1_GENRE_MAX)
-        av_metadata_set2(&s->metadata, "genre", ff_id3v1_genre_str[genre], 0);
+        av_dict_set(&s->metadata, "genre", ff_id3v1_genre_str[genre], 0);
     return 0;
 }
 
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 06ae6f8b90..be6c03bbe5 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -23,7 +23,7 @@
 #include "id3v1.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
-#include "metadata.h"
+#include "libavutil/dict.h"
 #include "avio_internal.h"
 
 int ff_id3v2_match(const uint8_t *buf, const char * magic)
@@ -133,7 +133,7 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const cha
         val = dst;
 
     if (val)
-        av_metadata_set2(&s->metadata, key, val, AV_METADATA_DONT_OVERWRITE);
+        av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE);
 }
 
 static int is_number(const char *str)
@@ -142,44 +142,44 @@ static int is_number(const char *str)
     return !*str;
 }
 
-static AVMetadataTag* get_date_tag(AVMetadata *m, const char *tag)
+static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
 {
-    AVMetadataTag *t;
-    if ((t = av_metadata_get(m, tag, NULL, AV_METADATA_MATCH_CASE)) &&
+    AVDictionaryEntry *t;
+    if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
         strlen(t->value) == 4 && is_number(t->value))
         return t;
     return NULL;
 }
 
-static void merge_date(AVMetadata **m)
+static void merge_date(AVDictionary **m)
 {
-    AVMetadataTag *t;
+    AVDictionaryEntry *t;
     char date[17] = {0};      // YYYY-MM-DD hh:mm
 
     if (!(t = get_date_tag(*m, "TYER")) &&
         !(t = get_date_tag(*m, "TYE")))
         return;
     av_strlcpy(date, t->value, 5);
-    av_metadata_set2(m, "TYER", NULL, 0);
-    av_metadata_set2(m, "TYE",  NULL, 0);
+    av_dict_set(m, "TYER", NULL, 0);
+    av_dict_set(m, "TYE",  NULL, 0);
 
     if (!(t = get_date_tag(*m, "TDAT")) &&
         !(t = get_date_tag(*m, "TDA")))
         goto finish;
     snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
-    av_metadata_set2(m, "TDAT", NULL, 0);
-    av_metadata_set2(m, "TDA",  NULL, 0);
+    av_dict_set(m, "TDAT", NULL, 0);
+    av_dict_set(m, "TDA",  NULL, 0);
 
     if (!(t = get_date_tag(*m, "TIME")) &&
         !(t = get_date_tag(*m, "TIM")))
         goto finish;
     snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
-    av_metadata_set2(m, "TIME", NULL, 0);
-    av_metadata_set2(m, "TIM",  NULL, 0);
+    av_dict_set(m, "TIME", NULL, 0);
+    av_dict_set(m, "TIM",  NULL, 0);
 
 finish:
     if (date[0])
-        av_metadata_set2(m, "date", date, 0);
+        av_dict_set(m, "date", date, 0);
 }
 
 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
diff --git a/libavformat/iff.c b/libavformat/iff.c
index f9b5a773db..2b84986aff 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -30,6 +30,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 
 #define ID_8SVX       MKTAG('8','S','V','X')
@@ -106,7 +107,7 @@ static int get_metadata(AVFormatContext *s,
         return AVERROR(EIO);
     }
     buf[data_size] = 0;
-    av_metadata_set2(&s->metadata, tag, buf, AV_METADATA_DONT_STRDUP_VAL);
+    av_dict_set(&s->metadata, tag, buf, AV_DICT_DONT_STRDUP_VAL);
     return 0;
 }
 
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 9c11c71e1f..64db318869 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -42,6 +42,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
 #include "libavutil/lzo.h"
+#include "libavutil/dict.h"
 #if CONFIG_ZLIB
 #include <zlib.h>
 #endif
@@ -1043,7 +1044,7 @@ static void matroska_merge_packets(AVPacket *out, AVPacket *in)
 }
 
 static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
-                                 AVMetadata **metadata, char *prefix)
+                                 AVDictionary **metadata, char *prefix)
 {
     MatroskaTag *tags = list->elem;
     char key[1024];
@@ -1059,14 +1060,14 @@ static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
         if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
         else         av_strlcpy(key, tags[i].name, sizeof(key));
         if (tags[i].def || !lang) {
-        av_metadata_set2(metadata, key, tags[i].string, 0);
+        av_dict_set(metadata, key, tags[i].string, 0);
         if (tags[i].sub.nb_elem)
             matroska_convert_tag(s, &tags[i].sub, metadata, key);
         }
         if (lang) {
             av_strlcat(key, "-", sizeof(key));
             av_strlcat(key, lang, sizeof(key));
-            av_metadata_set2(metadata, key, tags[i].string, 0);
+            av_dict_set(metadata, key, tags[i].string, 0);
             if (tags[i].sub.nb_elem)
                 matroska_convert_tag(s, &tags[i].sub, metadata, key);
         }
@@ -1234,7 +1235,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
     if (matroska->duration)
         matroska->ctx->duration = matroska->duration * matroska->time_scale
                                   * 1000 / AV_TIME_BASE;
-    av_metadata_set2(&s->metadata, "title", matroska->title, 0);
+    av_dict_set(&s->metadata, "title", matroska->title, 0);
 
     tracks = matroska->tracks.elem;
     for (i=0; i < matroska->tracks.nb_elem; i++) {
@@ -1432,8 +1433,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
         st->codec->codec_id = codec_id;
         st->start_time = 0;
         if (strcmp(track->language, "und"))
-            av_metadata_set2(&st->metadata, "language", track->language, 0);
-        av_metadata_set2(&st->metadata, "title", track->name, 0);
+            av_dict_set(&st->metadata, "language", track->language, 0);
+        av_dict_set(&st->metadata, "title", track->name, 0);
 
         if (track->flag_default)
             st->disposition |= AV_DISPOSITION_DEFAULT;
@@ -1494,7 +1495,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             AVStream *st = av_new_stream(s, 0);
             if (st == NULL)
                 break;
-            av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0);
+            av_dict_set(&st->metadata, "filename",attachements[j].filename, 0);
             st->codec->codec_id = CODEC_ID_NONE;
             st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
             st->codec->extradata  = av_malloc(attachements[j].bin.size);
@@ -1522,7 +1523,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
                            chapters[i].start, chapters[i].end,
                            chapters[i].title);
-            av_metadata_set2(&chapters[i].chapter->metadata,
+            av_dict_set(&chapters[i].chapter->metadata,
                              "title", chapters[i].title, 0);
             max_start = chapters[i].start;
         }
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index ba2ce28e17..fde1470f9a 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -30,6 +30,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/lfg.h"
+#include "libavutil/dict.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/mpeg4audio.h"
 #include <strings.h>
@@ -523,7 +524,7 @@ static int mkv_write_tracks(AVFormatContext *s)
         int bit_depth = av_get_bits_per_sample(codec->codec_id);
         int sample_rate = codec->sample_rate;
         int output_sample_rate = 0;
-        AVMetadataTag *tag;
+        AVDictionaryEntry *tag;
 
         if (!bit_depth)
             bit_depth = av_get_bits_per_sample_fmt(codec->sample_fmt);
@@ -536,9 +537,9 @@ static int mkv_write_tracks(AVFormatContext *s)
         put_ebml_uint (pb, MATROSKA_ID_TRACKUID        , i + 1);
         put_ebml_uint (pb, MATROSKA_ID_TRACKFLAGLACING , 0);    // no lacing (yet)
 
-        if ((tag = av_metadata_get(st->metadata, "title", NULL, 0)))
+        if ((tag = av_dict_get(st->metadata, "title", NULL, 0)))
             put_ebml_string(pb, MATROSKA_ID_TRACKNAME, tag->value);
-        tag = av_metadata_get(st->metadata, "language", NULL, 0);
+        tag = av_dict_get(st->metadata, "language", NULL, 0);
         put_ebml_string(pb, MATROSKA_ID_TRACKLANGUAGE, tag ? tag->value:"und");
 
         if (st->disposition)
@@ -586,7 +587,7 @@ static int mkv_write_tracks(AVFormatContext *s)
                 // XXX: interlace flag?
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width);
                 put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height);
-                if ((tag = av_metadata_get(s->metadata, "stereo_mode", NULL, 0))) {
+                if ((tag = av_dict_get(s->metadata, "stereo_mode", NULL, 0))) {
                     uint8_t stereo_fmt = atoi(tag->value);
                     int valid_fmt = 0;
 
@@ -675,7 +676,7 @@ static int mkv_write_chapters(AVFormatContext *s)
     for (i = 0; i < s->nb_chapters; i++) {
         ebml_master chapteratom, chapterdisplay;
         AVChapter *c     = s->chapters[i];
-        AVMetadataTag *t = NULL;
+        AVDictionaryEntry *t = NULL;
 
         chapteratom = start_ebml_master(pb, MATROSKA_ID_CHAPTERATOM, 0);
         put_ebml_uint(pb, MATROSKA_ID_CHAPTERUID, c->id);
@@ -685,7 +686,7 @@ static int mkv_write_chapters(AVFormatContext *s)
                       av_rescale_q(c->end,   c->time_base, scale));
         put_ebml_uint(pb, MATROSKA_ID_CHAPTERFLAGHIDDEN , 0);
         put_ebml_uint(pb, MATROSKA_ID_CHAPTERFLAGENABLED, 1);
-        if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) {
+        if ((t = av_dict_get(c->metadata, "title", NULL, 0))) {
             chapterdisplay = start_ebml_master(pb, MATROSKA_ID_CHAPTERDISPLAY, 0);
             put_ebml_string(pb, MATROSKA_ID_CHAPSTRING, t->value);
             put_ebml_string(pb, MATROSKA_ID_CHAPLANG  , "und");
@@ -698,7 +699,7 @@ static int mkv_write_chapters(AVFormatContext *s)
     return 0;
 }
 
-static void mkv_write_simpletag(AVIOContext *pb, AVMetadataTag *t)
+static void mkv_write_simpletag(AVIOContext *pb, AVDictionaryEntry *t)
 {
     uint8_t *key = av_strdup(t->key);
     uint8_t *p   = key;
@@ -728,12 +729,12 @@ static void mkv_write_simpletag(AVIOContext *pb, AVMetadataTag *t)
     av_freep(&key);
 }
 
-static int mkv_write_tag(AVFormatContext *s, AVMetadata *m, unsigned int elementid,
+static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int elementid,
                          unsigned int uid, ebml_master *tags)
 {
     MatroskaMuxContext *mkv = s->priv_data;
     ebml_master tag, targets;
-    AVMetadataTag *t = NULL;
+    AVDictionaryEntry *t = NULL;
     int ret;
 
     if (!tags->pos) {
@@ -749,7 +750,7 @@ static int mkv_write_tag(AVFormatContext *s, AVMetadata *m, unsigned int element
         put_ebml_uint(s->pb, elementid, uid);
     end_ebml_master(s->pb, targets);
 
-    while ((t = av_metadata_get(m, "", t, AV_METADATA_IGNORE_SUFFIX)))
+    while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX)))
         if (strcasecmp(t->key, "title"))
             mkv_write_simpletag(s->pb, t);
 
@@ -764,7 +765,7 @@ static int mkv_write_tags(AVFormatContext *s)
 
     ff_metadata_conv_ctx(s, ff_mkv_metadata_conv, NULL);
 
-    if (av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) {
+    if (av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
         ret = mkv_write_tag(s, s->metadata, 0, 0, &tags);
         if (ret < 0) return ret;
     }
@@ -772,7 +773,7 @@ static int mkv_write_tags(AVFormatContext *s)
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
 
-        if (!av_metadata_get(st->metadata, "", 0, AV_METADATA_IGNORE_SUFFIX))
+        if (!av_dict_get(st->metadata, "", 0, AV_DICT_IGNORE_SUFFIX))
             continue;
 
         ret = mkv_write_tag(s, st->metadata, MATROSKA_ID_TAGTARGETS_TRACKUID, i + 1, &tags);
@@ -782,7 +783,7 @@ static int mkv_write_tags(AVFormatContext *s)
     for (i = 0; i < s->nb_chapters; i++) {
         AVChapter *ch = s->chapters[i];
 
-        if (!av_metadata_get(ch->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX))
+        if (!av_dict_get(ch->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
             continue;
 
         ret = mkv_write_tag(s, ch->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID, ch->id, &tags);
@@ -799,7 +800,7 @@ static int mkv_write_header(AVFormatContext *s)
     MatroskaMuxContext *mkv = s->priv_data;
     AVIOContext *pb = s->pb;
     ebml_master ebml_header, segment_info;
-    AVMetadataTag *tag;
+    AVDictionaryEntry *tag;
     int ret, i;
 
     if (!strcmp(s->oformat->name, "webm")) mkv->mode = MODE_WEBM;
@@ -836,7 +837,7 @@ static int mkv_write_header(AVFormatContext *s)
 
     segment_info = start_ebml_master(pb, MATROSKA_ID_INFO, 0);
     put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
-    if ((tag = av_metadata_get(s->metadata, "title", NULL, 0)))
+    if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
         put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
     if (!(s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT)) {
         uint32_t segment_uid[4];
diff --git a/libavformat/metadata.h b/libavformat/metadata.h
index 49be8b47e1..33e0d1ff6c 100644
--- a/libavformat/metadata.h
+++ b/libavformat/metadata.h
@@ -39,7 +39,7 @@ struct AVMetadataConv{
 typedef struct AVMetadataConv AVMetadataConv;
 #endif
 
-void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv,
+void ff_metadata_conv(AVDictionary **pm, const AVMetadataConv *d_conv,
                                        const AVMetadataConv *s_conv);
 void ff_metadata_conv_ctx(AVFormatContext *ctx, const AVMetadataConv *d_conv,
                                                 const AVMetadataConv *s_conv);
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 3aec86ceb1..ff32c92313 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "riff.h"
@@ -84,7 +85,7 @@ static int mov_metadata_trkn(MOVContext *c, AVIOContext *pb, unsigned len)
 
     avio_rb16(pb); // unknown
     snprintf(buf, sizeof(buf), "%d", avio_rb16(pb));
-    av_metadata_set2(&c->fc->metadata, "track", buf, 0);
+    av_dict_set(&c->fc->metadata, "track", buf, 0);
 
     avio_rb16(pb); // total tracks
 
@@ -203,10 +204,10 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             avio_read(pb, str, str_size);
             str[str_size] = 0;
         }
-        av_metadata_set2(&c->fc->metadata, key, str, 0);
+        av_dict_set(&c->fc->metadata, key, str, 0);
         if (*language && strcmp(language, "und")) {
             snprintf(key2, sizeof(key2), "%s-%s", key, language);
-            av_metadata_set2(&c->fc->metadata, key2, str, 0);
+            av_dict_set(&c->fc->metadata, key2, str, 0);
         }
     }
     av_dlog(c->fc, "lang \"%3s\" ", language);
@@ -552,10 +553,10 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (strcmp(type, "qt  "))
         c->isom = 1;
     av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
-    av_metadata_set2(&c->fc->metadata, "major_brand", type, 0);
+    av_dict_set(&c->fc->metadata, "major_brand", type, 0);
     minor_ver = avio_rb32(pb); /* minor version */
     snprintf(minor_ver_str, sizeof(minor_ver_str), "%d", minor_ver);
-    av_metadata_set2(&c->fc->metadata, "minor_version", minor_ver_str, 0);
+    av_dict_set(&c->fc->metadata, "minor_version", minor_ver_str, 0);
 
     comp_brand_size = atom.size - 8;
     if (comp_brand_size < 0)
@@ -565,7 +566,7 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR(ENOMEM);
     avio_read(pb, comp_brands_str, comp_brand_size);
     comp_brands_str[comp_brand_size] = 0;
-    av_metadata_set2(&c->fc->metadata, "compatible_brands", comp_brands_str, 0);
+    av_dict_set(&c->fc->metadata, "compatible_brands", comp_brands_str, 0);
     av_freep(&comp_brands_str);
 
     return 0;
@@ -589,7 +590,7 @@ static int mov_read_moof(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return mov_read_default(c, pb, atom);
 }
 
-static void mov_metadata_creation_time(AVMetadata **metadata, time_t time)
+static void mov_metadata_creation_time(AVDictionary **metadata, time_t time)
 {
     char buffer[32];
     if (time) {
@@ -598,7 +599,7 @@ static void mov_metadata_creation_time(AVMetadata **metadata, time_t time)
         ptm = gmtime(&time);
         if (!ptm) return;
         strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", ptm);
-        av_metadata_set2(metadata, "creation_time", buffer, 0);
+        av_dict_set(metadata, "creation_time", buffer, 0);
     }
 }
 
@@ -635,7 +636,7 @@ static int mov_read_mdhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     lang = avio_rb16(pb); /* language */
     if (ff_mov_lang_to_iso639(lang, language))
-        av_metadata_set2(&st->metadata, "language", language, 0);
+        av_dict_set(&st->metadata, "language", language, 0);
     avio_rb16(pb); /* quality */
 
     return 0;
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 67d39f4aef..5f4eea49fc 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -33,6 +33,7 @@
 #include "internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
+#include "libavutil/dict.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -1525,15 +1526,15 @@ static int mov_write_string_metadata(AVFormatContext *s, AVIOContext *pb,
                                      int long_style)
 {
     int l, lang = 0, len, len2;
-    AVMetadataTag *t, *t2 = NULL;
+    AVDictionaryEntry *t, *t2 = NULL;
     char tag2[16];
 
-    if (!(t = av_metadata_get(s->metadata, tag, NULL, 0)))
+    if (!(t = av_dict_get(s->metadata, tag, NULL, 0)))
         return 0;
 
     len = strlen(t->key);
     snprintf(tag2, sizeof(tag2), "%s-", tag);
-    while ((t2 = av_metadata_get(s->metadata, tag2, t2, AV_METADATA_IGNORE_SUFFIX))) {
+    while ((t2 = av_dict_get(s->metadata, tag2, t2, AV_DICT_IGNORE_SUFFIX))) {
         len2 = strlen(t2->key);
         if (len2 == len+4 && !strcmp(t->value, t2->value)
             && (l=ff_mov_iso639_to_lang(&t2->key[len2-3], 1)) >= 0) {
@@ -1548,7 +1549,7 @@ static int mov_write_string_metadata(AVFormatContext *s, AVIOContext *pb,
 static int mov_write_trkn_tag(AVIOContext *pb, MOVMuxContext *mov,
                               AVFormatContext *s)
 {
-    AVMetadataTag *t = av_metadata_get(s->metadata, "track", NULL, 0);
+    AVDictionaryEntry *t = av_dict_get(s->metadata, "track", NULL, 0);
     int size = 0, track = t ? atoi(t->value) : 0;
     if (track) {
         avio_wb32(pb, 32); /* size */
@@ -1640,7 +1641,7 @@ static int mov_write_3gp_udta_tag(AVIOContext *pb, AVFormatContext *s,
                                   const char *tag, const char *str)
 {
     int64_t pos = avio_tell(pb);
-    AVMetadataTag *t = av_metadata_get(s->metadata, str, NULL, 0);
+    AVDictionaryEntry *t = av_dict_get(s->metadata, str, NULL, 0);
     if (!t || !utf8len(t->value))
         return 0;
     avio_wb32(pb, 0);   /* size */
@@ -1652,7 +1653,7 @@ static int mov_write_3gp_udta_tag(AVIOContext *pb, AVFormatContext *s,
         avio_wb16(pb, language_code("eng")); /* language */
         avio_write(pb, t->value, strlen(t->value)+1); /* UTF8 string value */
         if (!strcmp(tag, "albm") &&
-            (t = av_metadata_get(s->metadata, "track", NULL, 0)))
+            (t = av_dict_get(s->metadata, "track", NULL, 0)))
             avio_w8(pb, atoi(t->value));
     }
     return updateSize(pb, pos);
@@ -1671,10 +1672,10 @@ static int mov_write_chpl_tag(AVIOContext *pb, AVFormatContext *s)
 
     for (i = 0; i < nb_chapters; i++) {
         AVChapter *c = s->chapters[i];
-        AVMetadataTag *t;
+        AVDictionaryEntry *t;
         avio_wb64(pb, av_rescale_q(c->start, c->time_base, (AVRational){1,10000000}));
 
-        if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) {
+        if ((t = av_dict_get(c->metadata, "title", NULL, 0))) {
             int len = FFMIN(strlen(t->value), 255);
             avio_w8(pb, len);
             avio_write(pb, t->value, len);
@@ -1752,7 +1753,7 @@ static void mov_write_psp_udta_tag(AVIOContext *pb,
 
 static int mov_write_uuidusmt_tag(AVIOContext *pb, AVFormatContext *s)
 {
-    AVMetadataTag *title = av_metadata_get(s->metadata, "title", NULL, 0);
+    AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
     int64_t pos, pos2;
 
     if (title) {
@@ -2102,13 +2103,13 @@ static void mov_create_chapter_track(AVFormatContext *s, int tracknum)
 
     for (i = 0; i < s->nb_chapters; i++) {
         AVChapter *c = s->chapters[i];
-        AVMetadataTag *t;
+        AVDictionaryEntry *t;
 
         int64_t end = av_rescale_q(c->end, c->time_base, (AVRational){1,MOV_TIMESCALE});
         pkt.pts = pkt.dts = av_rescale_q(c->start, c->time_base, (AVRational){1,MOV_TIMESCALE});
         pkt.duration = end - pkt.dts;
 
-        if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) {
+        if ((t = av_dict_get(c->metadata, "title", NULL, 0))) {
             len = strlen(t->value);
             pkt.size = len+2;
             pkt.data = av_malloc(pkt.size);
@@ -2182,7 +2183,7 @@ static int mov_write_header(AVFormatContext *s)
     for(i=0; i<s->nb_streams; i++){
         AVStream *st= s->streams[i];
         MOVTrack *track= &mov->tracks[i];
-        AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL,0);
+        AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
 
         track->enc = st->codec;
         track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV);
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index dbecf3d2a7..a1db2323ac 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "id3v2.h"
 #include "id3v1.h"
@@ -149,7 +150,7 @@ static int mp3_read_header(AVFormatContext *s,
 
     off = avio_tell(s->pb);
 
-    if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX))
+    if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
         ff_id3v1_read(s);
 
     if (mp3_parse_vbr_tags(s, st, off) < 0)
diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c
index 00ed6f8d4c..092d16ecc1 100644
--- a/libavformat/mp3enc.c
+++ b/libavformat/mp3enc.c
@@ -27,19 +27,20 @@
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
+#include "libavutil/dict.h"
 
 static int id3v1_set_string(AVFormatContext *s, const char *key,
                             uint8_t *buf, int buf_size)
 {
-    AVMetadataTag *tag;
-    if ((tag = av_metadata_get(s->metadata, key, NULL, 0)))
+    AVDictionaryEntry *tag;
+    if ((tag = av_dict_get(s->metadata, key, NULL, 0)))
         av_strlcpy(buf, tag->value, buf_size);
     return !!tag;
 }
 
 static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf)
 {
-    AVMetadataTag *tag;
+    AVDictionaryEntry *tag;
     int i, count = 0;
 
     memset(buf, 0, ID3v1_TAG_SIZE); /* fail safe */
@@ -51,13 +52,13 @@ static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf)
     count += id3v1_set_string(s, "TALB",    buf + 63, 30);       //album
     count += id3v1_set_string(s, "TDRL",    buf + 93,  4);       //date
     count += id3v1_set_string(s, "comment", buf + 97, 30);
-    if ((tag = av_metadata_get(s->metadata, "TRCK", NULL, 0))) { //track
+    if ((tag = av_dict_get(s->metadata, "TRCK", NULL, 0))) { //track
         buf[125] = 0;
         buf[126] = atoi(tag->value);
         count++;
     }
     buf[127] = 0xFF; /* default to unknown genre */
-    if ((tag = av_metadata_get(s->metadata, "TCON", NULL, 0))) { //genre
+    if ((tag = av_dict_get(s->metadata, "TCON", NULL, 0))) { //genre
         for(i = 0; i <= ID3v1_GENRE_MAX; i++) {
             if (!strcasecmp(tag->value, ff_id3v1_genre_str[i])) {
                 buf[127] = i;
@@ -173,7 +174,7 @@ static const AVClass mp3_muxer_class = {
     .version        = LIBAVUTIL_VERSION_INT,
 };
 
-static int id3v2_check_write_tag(AVFormatContext *s, AVMetadataTag *t, const char table[][4],
+static int id3v2_check_write_tag(AVFormatContext *s, AVDictionaryEntry *t, const char table[][4],
                                  enum ID3v2Encoding enc)
 {
     uint32_t tag;
@@ -195,7 +196,7 @@ static int id3v2_check_write_tag(AVFormatContext *s, AVMetadataTag *t, const cha
 static int mp3_write_header(struct AVFormatContext *s)
 {
     MP3Context  *mp3 = s->priv_data;
-    AVMetadataTag *t = NULL;
+    AVDictionaryEntry *t = NULL;
     int totlen = 0, enc = mp3->id3v2_version == 3 ? ID3v2_ENCODING_UTF16BOM :
                                                     ID3v2_ENCODING_UTF8;
     int64_t size_pos, cur_pos;
@@ -212,7 +213,7 @@ static int mp3_write_header(struct AVFormatContext *s)
     if (mp3->id3v2_version == 4)
         ff_metadata_conv(&s->metadata, ff_id3v2_4_metadata_conv, NULL);
 
-    while ((t = av_metadata_get(s->metadata, "", t, AV_METADATA_IGNORE_SUFFIX))) {
+    while ((t = av_dict_get(s->metadata, "", t, AV_DICT_IGNORE_SUFFIX))) {
         int ret;
 
         if ((ret = id3v2_check_write_tag(s, t, ff_id3v2_tags, enc)) > 0) {
diff --git a/libavformat/mpc.c b/libavformat/mpc.c
index 07c22990f6..0aec1e81d3 100644
--- a/libavformat/mpc.c
+++ b/libavformat/mpc.c
@@ -23,6 +23,7 @@
 #include "avformat.h"
 #include "apetag.h"
 #include "id3v1.h"
+#include "libavutil/dict.h"
 
 #define MPC_FRAMESIZE  1152
 #define DELAY_FRAMES   32
@@ -96,7 +97,7 @@ static int mpc_read_header(AVFormatContext *s, AVFormatParameters *ap)
     if (s->pb->seekable) {
         int64_t pos = avio_tell(s->pb);
         ff_ape_parse_tag(s);
-        if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX))
+        if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
             ff_id3v1_read(s);
         avio_seek(s->pb, pos, SEEK_SET);
     }
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index f675bf03fb..bf81b20b5c 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -26,6 +26,7 @@
 #include "libavutil/crc.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
+#include "libavutil/dict.h"
 #include "libavutil/opt.h"
 #include "libavcodec/bytestream.h"
 #include "avformat.h"
@@ -938,7 +939,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
         language[1] = get8(pp, desc_end);
         language[2] = get8(pp, desc_end);
         language[3] = 0;
-        av_metadata_set2(&st->metadata, "language", language, 0);
+        av_dict_set(&st->metadata, "language", language, 0);
         break;
     case 0x59: /* subtitling descriptor */
         language[0] = get8(pp, desc_end);
@@ -967,7 +968,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
             }
         }
         *pp += 4;
-        av_metadata_set2(&st->metadata, "language", language, 0);
+        av_dict_set(&st->metadata, "language", language, 0);
         break;
     case 0x0a: /* ISO 639 language descriptor */
         for (i = 0; i + 4 <= desc_len; i += 4) {
@@ -983,7 +984,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
         }
         if (i) {
             language[i - 1] = 0;
-            av_metadata_set2(&st->metadata, "language", language, 0);
+            av_dict_set(&st->metadata, "language", language, 0);
         }
         break;
     case 0x05: /* registration descriptor */
@@ -1227,8 +1228,8 @@ static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
                 if (name) {
                     AVProgram *program = av_new_program(ts->stream, sid);
                     if(program) {
-                        av_metadata_set2(&program->metadata, "service_name", name, 0);
-                        av_metadata_set2(&program->metadata, "service_provider", provider_name, 0);
+                        av_dict_set(&program->metadata, "service_name", name, 0);
+                        av_dict_set(&program->metadata, "service_provider", provider_name, 0);
                     }
                 }
                 av_free(name);
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 393b779168..26d2cb229f 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/bswap.h"
 #include "libavutil/crc.h"
+#include "libavutil/dict.h"
 #include "libavutil/opt.h"
 #include "libavcodec/mpegvideo.h"
 #include "avformat.h"
@@ -244,7 +245,7 @@ static void mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
     for(i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
         MpegTSWriteStream *ts_st = st->priv_data;
-        AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL,0);
+        AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
         switch(st->codec->codec_id) {
         case CODEC_ID_MPEG1VIDEO:
         case CODEC_ID_MPEG2VIDEO:
@@ -443,7 +444,7 @@ static int mpegts_write_header(AVFormatContext *s)
     MpegTSWriteStream *ts_st;
     MpegTSService *service;
     AVStream *st, *pcr_st = NULL;
-    AVMetadataTag *title, *provider;
+    AVDictionaryEntry *title, *provider;
     int i, j;
     const char *service_name;
     const char *provider_name;
@@ -452,11 +453,11 @@ static int mpegts_write_header(AVFormatContext *s)
     ts->tsid = ts->transport_stream_id;
     ts->onid = ts->original_network_id;
     /* allocate a single DVB service */
-    title = av_metadata_get(s->metadata, "service_name", NULL, 0);
+    title = av_dict_get(s->metadata, "service_name", NULL, 0);
     if (!title)
-        title = av_metadata_get(s->metadata, "title", NULL, 0);
+        title = av_dict_get(s->metadata, "title", NULL, 0);
     service_name = title ? title->value : DEFAULT_SERVICE_NAME;
-    provider = av_metadata_get(s->metadata, "service_provider", NULL, 0);
+    provider = av_dict_get(s->metadata, "service_provider", NULL, 0);
     provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
     service = mpegts_add_service(ts, ts->service_id, provider_name, service_name);
     service->pmt.write_packet = section_write_packet;
diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 037a6f166a..08338c4609 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -20,6 +20,7 @@
  */
 #include "avformat.h"
 #include "riff.h"
+#include "libavutil/dict.h"
 
 //#define DEBUG
 //#define DEBUG_DUMP_INDEX // XXX dumbdriving-271.nsv breaks with it commented!!
@@ -328,7 +329,7 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap)
                 break;
             *p++ = '\0';
             av_dlog(s, "NSV NSVf INFO: %s='%s'\n", token, value);
-            av_metadata_set2(&s->metadata, token, value, 0);
+            av_dict_set(&s->metadata, token, value, 0);
         }
         av_free(strings);
     }
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index d8278175a0..db1b999c90 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -23,6 +23,7 @@
 #include <strings.h>
 #include "libavutil/avstring.h"
 #include "libavutil/bswap.h"
+#include "libavutil/dict.h"
 #include "libavutil/tree.h"
 #include "avio_internal.h"
 #include "nut.h"
@@ -401,7 +402,7 @@ static int decode_info_header(NUTContext *nut){
     const char *type;
     AVChapter *chapter= NULL;
     AVStream *st= NULL;
-    AVMetadata **metadata = NULL;
+    AVDictionary **metadata = NULL;
 
     end= get_packetheader(nut, bc, 1, INFO_STARTCODE);
     end += avio_tell(bc);
@@ -459,7 +460,7 @@ static int decode_info_header(NUTContext *nut){
             }
             if(metadata && strcasecmp(name,"Uses")
                && strcasecmp(name,"Depends") && strcasecmp(name,"Replaces"))
-                av_metadata_set2(metadata, name, str_value, 0);
+                av_dict_set(metadata, name, str_value, 0);
         }
     }
 
diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c
index 85340b158e..260a7607d8 100644
--- a/libavformat/nutenc.c
+++ b/libavformat/nutenc.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/tree.h"
+#include "libavutil/dict.h"
 #include "libavcodec/mpegaudiodata.h"
 #include "nut.h"
 #include "internal.h"
@@ -430,7 +431,7 @@ static int add_info(AVIOContext *bc, const char *type, const char *value){
 
 static int write_globalinfo(NUTContext *nut, AVIOContext *bc){
     AVFormatContext *s= nut->avf;
-    AVMetadataTag *t = NULL;
+    AVDictionaryEntry *t = NULL;
     AVIOContext *dyn_bc;
     uint8_t *dyn_buf=NULL;
     int count=0, dyn_size;
@@ -438,7 +439,7 @@ static int write_globalinfo(NUTContext *nut, AVIOContext *bc){
     if(ret < 0)
         return ret;
 
-    while ((t = av_metadata_get(s->metadata, "", t, AV_METADATA_IGNORE_SUFFIX)))
+    while ((t = av_dict_get(s->metadata, "", t, AV_DICT_IGNORE_SUFFIX)))
         count += add_info(dyn_bc, t->key, t->value);
 
     ff_put_v(bc, 0); //stream_if_plus1
@@ -489,7 +490,7 @@ static int write_chapter(NUTContext *nut, AVIOContext *bc, int id)
 {
     AVIOContext *dyn_bc;
     uint8_t *dyn_buf = NULL;
-    AVMetadataTag *t = NULL;
+    AVDictionaryEntry *t = NULL;
     AVChapter *ch    = nut->avf->chapters[id];
     int ret, dyn_size, count = 0;
 
@@ -502,7 +503,7 @@ static int write_chapter(NUTContext *nut, AVIOContext *bc, int id)
     put_tt(nut, nut->chapter[id].time_base, bc, ch->start); // chapter_start
     ff_put_v(bc, ch->end - ch->start);                      // chapter_len
 
-    while ((t = av_metadata_get(ch->metadata, "", t, AV_METADATA_IGNORE_SUFFIX)))
+    while ((t = av_dict_get(ch->metadata, "", t, AV_DICT_IGNORE_SUFFIX)))
         count += add_info(dyn_bc, t->key, t->value);
 
     ff_put_v(bc, count);
diff --git a/libavformat/oggdec.h b/libavformat/oggdec.h
index 7d66cd5638..e7d1022734 100644
--- a/libavformat/oggdec.h
+++ b/libavformat/oggdec.h
@@ -111,7 +111,7 @@ extern const struct ogg_codec ff_speex_codec;
 extern const struct ogg_codec ff_theora_codec;
 extern const struct ogg_codec ff_vorbis_codec;
 
-int ff_vorbis_comment(AVFormatContext *ms, AVMetadata **m, const uint8_t *buf, int size);
+int ff_vorbis_comment(AVFormatContext *ms, AVDictionary **m, const uint8_t *buf, int size);
 
 static inline int
 ogg_find_stream (struct ogg * ogg, int serial)
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 92c751c07a..bc4b3c10b2 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -221,7 +221,7 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st,
 }
 
 static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact,
-                                        int *header_len, AVMetadata **m, int framing_bit)
+                                        int *header_len, AVDictionary **m, int framing_bit)
 {
     const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT;
     int size;
@@ -247,7 +247,7 @@ static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact,
 
 static int ogg_build_flac_headers(AVCodecContext *avctx,
                                   OGGStreamContext *oggstream, int bitexact,
-                                  AVMetadata **m)
+                                  AVDictionary **m)
 {
     enum FLACExtradataFormat format;
     uint8_t *streaminfo;
@@ -287,7 +287,7 @@ static int ogg_build_flac_headers(AVCodecContext *avctx,
 
 static int ogg_build_speex_headers(AVCodecContext *avctx,
                                    OGGStreamContext *oggstream, int bitexact,
-                                   AVMetadata **m)
+                                   AVDictionary **m)
 {
     uint8_t *p;
 
diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c
index 830f0bca37..86951f3e2f 100644
--- a/libavformat/oggparsevorbis.c
+++ b/libavformat/oggparsevorbis.c
@@ -25,6 +25,7 @@
 #include <stdlib.h>
 #include "libavutil/avstring.h"
 #include "libavutil/bswap.h"
+#include "libavutil/dict.h"
 #include "libavcodec/get_bits.h"
 #include "libavcodec/bytestream.h"
 #include "avformat.h"
@@ -57,8 +58,8 @@ static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val)
         if (!chapter)
             return 0;
 
-        av_metadata_set2(&chapter->metadata, "title", val,
-                         AV_METADATA_DONT_STRDUP_VAL);
+        av_dict_set(&chapter->metadata, "title", val,
+                         AV_DICT_DONT_STRDUP_VAL);
     } else
         return 0;
 
@@ -67,7 +68,7 @@ static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val)
 }
 
 int
-ff_vorbis_comment(AVFormatContext * as, AVMetadata **m, const uint8_t *buf, int size)
+ff_vorbis_comment(AVFormatContext * as, AVDictionary **m, const uint8_t *buf, int size)
 {
     const uint8_t *p = buf;
     const uint8_t *end = buf + size;
@@ -127,9 +128,9 @@ ff_vorbis_comment(AVFormatContext * as, AVMetadata **m, const uint8_t *buf, int
             ct[vl] = 0;
 
             if (!ogm_chapter(as, tt, ct))
-                av_metadata_set2(m, tt, ct,
-                                   AV_METADATA_DONT_STRDUP_KEY |
-                                   AV_METADATA_DONT_STRDUP_VAL);
+                av_dict_set(m, tt, ct,
+                                   AV_DICT_DONT_STRDUP_KEY |
+                                   AV_DICT_DONT_STRDUP_VAL);
         }
     }
 
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index 148c6022bb..619c6a7b6c 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -22,6 +22,7 @@
 //#define DEBUG
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 
 typedef struct {
@@ -98,7 +99,7 @@ static int r3d_read_red1(AVFormatContext *s)
 
     avio_read(s->pb, filename, 257);
     filename[sizeof(filename)-1] = 0;
-    av_metadata_set2(&st->metadata, "filename", filename, 0);
+    av_dict_set(&st->metadata, "filename", filename, 0);
 
     av_dlog(s, "filename %s\n", filename);
     av_dlog(s, "resolution %dx%d\n", st->codec->width, st->codec->height);
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index d6a5251c05..fbc4d0cee6 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "riff.h"
 #include "rm.h"
@@ -104,7 +105,7 @@ static void rm_read_metadata(AVFormatContext *s, int wide)
     for (i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) {
         int len = wide ? avio_rb16(s->pb) : avio_r8(s->pb);
         get_strl(s->pb, buf, sizeof(buf), len);
-        av_metadata_set2(&s->metadata, ff_rm_metadata[i], buf, 0);
+        av_dict_set(&s->metadata, ff_rm_metadata[i], buf, 0);
     }
 }
 
diff --git a/libavformat/rmenc.c b/libavformat/rmenc.c
index d64040c1a2..2476cb0590 100644
--- a/libavformat/rmenc.c
+++ b/libavformat/rmenc.c
@@ -21,6 +21,7 @@
 #include "avformat.h"
 #include "avio_internal.h"
 #include "rm.h"
+#include "libavutil/dict.h"
 
 typedef struct {
     int nb_packets;
@@ -71,7 +72,7 @@ static int rv10_write_header(AVFormatContext *ctx,
     const char *desc, *mimetype;
     int nb_packets, packet_total_size, packet_max_size, size, packet_avg_size, i;
     int bit_rate, v, duration, flags, data_pos;
-    AVMetadataTag *tag;
+    AVDictionaryEntry *tag;
 
     start_ptr = s->buf_ptr;
 
@@ -127,13 +128,13 @@ static int rv10_write_header(AVFormatContext *ctx,
     ffio_wfourcc(s,"CONT");
     size =  4 * 2 + 10;
     for(i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) {
-        tag = av_metadata_get(ctx->metadata, ff_rm_metadata[i], NULL, 0);
+        tag = av_dict_get(ctx->metadata, ff_rm_metadata[i], NULL, 0);
         if(tag) size += strlen(tag->value);
     }
     avio_wb32(s,size);
     avio_wb16(s,0);
     for(i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) {
-        tag = av_metadata_get(ctx->metadata, ff_rm_metadata[i], NULL, 0);
+        tag = av_dict_get(ctx->metadata, ff_rm_metadata[i], NULL, 0);
         put_str(s, tag ? tag->value : "");
     }
 
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index 935b81d7d8..f67d3cd7c4 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include <stdlib.h>
 
@@ -131,11 +132,11 @@ static int rpl_read_header(AVFormatContext *s, AVFormatParameters *ap)
     // for the text in a few cases; samples needed.)
     error |= read_line(pb, line, sizeof(line));      // ARMovie
     error |= read_line(pb, line, sizeof(line));      // movie name
-    av_metadata_set2(&s->metadata, "title"    , line, 0);
+    av_dict_set(&s->metadata, "title"    , line, 0);
     error |= read_line(pb, line, sizeof(line));      // date/copyright
-    av_metadata_set2(&s->metadata, "copyright", line, 0);
+    av_dict_set(&s->metadata, "copyright", line, 0);
     error |= read_line(pb, line, sizeof(line));      // author and other
-    av_metadata_set2(&s->metadata, "author"   , line, 0);
+    av_dict_set(&s->metadata, "author"   , line, 0);
 
     // video headers
     vst = av_new_stream(s, 0);
diff --git a/libavformat/rtpdec_asf.c b/libavformat/rtpdec_asf.c
index ecacc0eb4e..ef78426f1f 100644
--- a/libavformat/rtpdec_asf.c
+++ b/libavformat/rtpdec_asf.c
@@ -113,7 +113,7 @@ int ff_wms_parse_sdp_a_line(AVFormatContext *s, const char *p)
         ret = av_open_input_stream(&rt->asf_ctx, &pb, "", &ff_asf_demuxer, NULL);
         if (ret < 0)
             return ret;
-        av_metadata_copy(&s->metadata, rt->asf_ctx->metadata, 0);
+        av_dict_copy(&s->metadata, rt->asf_ctx->metadata, 0);
         rt->asf_pb_pos = avio_tell(&pb);
         av_free(buf);
         rt->asf_ctx->pb = NULL;
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index c78b762a36..b2735f6165 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -24,6 +24,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "avio_internal.h"
 
@@ -281,11 +282,11 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
         }
         break;
     case 's':
-        av_metadata_set2(&s->metadata, "title", p, 0);
+        av_dict_set(&s->metadata, "title", p, 0);
         break;
     case 'i':
         if (s->nb_streams == 0) {
-            av_metadata_set2(&s->metadata, "comment", p, 0);
+            av_dict_set(&s->metadata, "comment", p, 0);
             break;
         }
         break;
diff --git a/libavformat/sauce.c b/libavformat/sauce.c
index f9ca17ac30..a125241335 100644
--- a/libavformat/sauce.c
+++ b/libavformat/sauce.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "sauce.h"
 
@@ -44,7 +45,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g
 #define GET_SAUCE_META(name,size) \
     if (avio_read(pb, buf, size) == size && buf[0]) { \
         buf[size] = 0; \
-        av_metadata_set2(&avctx->metadata, name, buf, 0); \
+        av_dict_set(&avctx->metadata, name, buf, 0); \
     }
 
     GET_SAUCE_META("title",     35)
@@ -95,7 +96,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g
                 str[65*i + 64] = '\n';
             }
             str[65*i] = 0;
-            av_metadata_set2(&avctx->metadata, "comment", str, AV_METADATA_DONT_STRDUP_VAL);
+            av_dict_set(&avctx->metadata, "comment", str, AV_DICT_DONT_STRDUP_VAL);
         }
     }
 
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 005434c6c4..f72e2c567f 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -21,6 +21,7 @@
 #include <string.h>
 #include "libavutil/avstring.h"
 #include "libavutil/base64.h"
+#include "libavutil/dict.h"
 #include "libavutil/parseutils.h"
 #include "libavcodec/xiph.h"
 #include "avformat.h"
@@ -476,7 +477,7 @@ void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *des
 
 int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
 {
-    AVMetadataTag *title = av_metadata_get(ac[0]->metadata, "title", NULL, 0);
+    AVDictionaryEntry *title = av_dict_get(ac[0]->metadata, "title", NULL, 0);
     struct sdp_session_level s;
     int i, j, port, ttl, is_multicast;
     char dst[32], dst_type[5];
diff --git a/libavformat/soxdec.c b/libavformat/soxdec.c
index 74e53727e2..fb7b063f8c 100644
--- a/libavformat/soxdec.c
+++ b/libavformat/soxdec.c
@@ -30,6 +30,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "pcm.h"
 #include "sox.h"
@@ -101,8 +102,8 @@ static int sox_read_header(AVFormatContext *s,
         }
         comment[comment_size] = 0;
 
-        av_metadata_set2(&s->metadata, "comment", comment,
-                               AV_METADATA_DONT_STRDUP_VAL);
+        av_dict_set(&s->metadata, "comment", comment,
+                               AV_DICT_DONT_STRDUP_VAL);
     }
 
     avio_skip(pb, header_size - SOX_FIXED_HDR - comment_size);
diff --git a/libavformat/soxenc.c b/libavformat/soxenc.c
index cb71d73e9f..01d0cda2cf 100644
--- a/libavformat/soxenc.c
+++ b/libavformat/soxenc.c
@@ -30,6 +30,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "sox.h"
@@ -43,10 +44,10 @@ static int sox_write_header(AVFormatContext *s)
     SoXContext *sox = s->priv_data;
     AVIOContext *pb = s->pb;
     AVCodecContext *enc = s->streams[0]->codec;
-    AVMetadataTag *comment;
+    AVDictionaryEntry *comment;
     size_t comment_len = 0, comment_size;
 
-    comment = av_metadata_get(s->metadata, "comment", NULL, 0);
+    comment = av_dict_get(s->metadata, "comment", NULL, 0);
     if (comment)
         comment_len = strlen(comment->value);
     comment_size = (comment_len + 7) & ~7;
diff --git a/libavformat/tta.c b/libavformat/tta.c
index 003620076d..49234a81e4 100644
--- a/libavformat/tta.c
+++ b/libavformat/tta.c
@@ -22,6 +22,7 @@
 #include "libavcodec/get_bits.h"
 #include "avformat.h"
 #include "id3v1.h"
+#include "libavutil/dict.h"
 
 typedef struct {
     int totalframes, currentframe;
@@ -43,7 +44,7 @@ static int tta_read_header(AVFormatContext *s, AVFormatParameters *ap)
     int i, channels, bps, samplerate, datalen, framelen;
     uint64_t framepos, start_offset;
 
-    if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX))
+    if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
         ff_id3v1_read(s);
 
     start_offset = avio_tell(s->pb);
diff --git a/libavformat/tty.c b/libavformat/tty.c
index ecd3f58c77..ee6b2f1334 100644
--- a/libavformat/tty.c
+++ b/libavformat/tty.c
@@ -27,6 +27,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
 #include "libavutil/log.h"
+#include "libavutil/dict.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "avformat.h"
@@ -60,7 +61,7 @@ static int efi_read(AVFormatContext *avctx, uint64_t start_pos)
         return -1; \
     if (avio_read(pb, buf, size) == size) { \
         buf[len] = 0; \
-        av_metadata_set2(&avctx->metadata, name, buf, 0); \
+        av_dict_set(&avctx->metadata, name, buf, 0); \
     }
 
     GET_EFI_META("filename", 12)
diff --git a/libavformat/utils.c b/libavformat/utils.c
index bdc20f6d9b..8ab59e1d3b 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -26,6 +26,7 @@
 #include "internal.h"
 #include "libavcodec/internal.h"
 #include "libavutil/opt.h"
+#include "libavutil/dict.h"
 #include "metadata.h"
 #include "id3v2.h"
 #include "libavutil/avstring.h"
@@ -2560,7 +2561,7 @@ void avformat_free_context(AVFormatContext *s)
             av_parser_close(st->parser);
             av_free_packet(&st->cur_pkt);
         }
-        av_metadata_free(&st->metadata);
+        av_dict_free(&st->metadata);
         av_free(st->index_entries);
         av_free(st->codec->extradata);
         av_free(st->codec->subtitle_header);
@@ -2570,18 +2571,18 @@ void avformat_free_context(AVFormatContext *s)
         av_free(st);
     }
     for(i=s->nb_programs-1; i>=0; i--) {
-        av_metadata_free(&s->programs[i]->metadata);
+        av_dict_free(&s->programs[i]->metadata);
         av_freep(&s->programs[i]->stream_index);
         av_freep(&s->programs[i]);
     }
     av_freep(&s->programs);
     av_freep(&s->priv_data);
     while(s->nb_chapters--) {
-        av_metadata_free(&s->chapters[s->nb_chapters]->metadata);
+        av_dict_free(&s->chapters[s->nb_chapters]->metadata);
         av_free(s->chapters[s->nb_chapters]);
     }
     av_freep(&s->chapters);
-    av_metadata_free(&s->metadata);
+    av_dict_free(&s->metadata);
     av_freep(&s->streams);
     av_free(s);
 }
@@ -2685,7 +2686,7 @@ AVChapter *ff_new_chapter(AVFormatContext *s, int id, AVRational time_base, int6
             return NULL;
         dynarray_add(&s->chapters, &s->nb_chapters, chapter);
     }
-    av_metadata_set2(&chapter->metadata, "title", title, 0);
+    av_dict_set(&chapter->metadata, "title", title, 0);
     chapter->id    = id;
     chapter->time_base= time_base;
     chapter->start = start;
@@ -2824,7 +2825,7 @@ int av_write_header(AVFormatContext *s)
 
     /* set muxer identification string */
     if (s->nb_streams && !(s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT)) {
-        av_metadata_set2(&s->metadata, "encoder", LIBAVFORMAT_IDENT, 0);
+        av_dict_set(&s->metadata, "encoder", LIBAVFORMAT_IDENT, 0);
     }
 
     if(s->oformat->write_header){
@@ -3138,13 +3139,13 @@ static void print_fps(double d, const char *postfix){
     else                  av_log(NULL, AV_LOG_INFO, ", %1.0fk %s", d/1000, postfix);
 }
 
-static void dump_metadata(void *ctx, AVMetadata *m, const char *indent)
+static void dump_metadata(void *ctx, AVDictionary *m, const char *indent)
 {
-    if(m && !(m->count == 1 && av_metadata_get(m, "language", NULL, 0))){
-        AVMetadataTag *tag=NULL;
+    if(m && !(m->count == 1 && av_dict_get(m, "language", NULL, 0))){
+        AVDictionaryEntry *tag=NULL;
 
         av_log(ctx, AV_LOG_INFO, "%sMetadata:\n", indent);
-        while((tag=av_metadata_get(m, "", tag, AV_METADATA_IGNORE_SUFFIX))) {
+        while((tag=av_dict_get(m, "", tag, AV_DICT_IGNORE_SUFFIX))) {
             if(strcmp("language", tag->key))
                 av_log(ctx, AV_LOG_INFO, "%s  %-16s: %s\n", indent, tag->key, tag->value);
         }
@@ -3158,7 +3159,7 @@ static void dump_stream_format(AVFormatContext *ic, int i, int index, int is_out
     int flags = (is_output ? ic->oformat->flags : ic->iformat->flags);
     AVStream *st = ic->streams[i];
     int g = av_gcd(st->time_base.num, st->time_base.den);
-    AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL, 0);
+    AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
     avcodec_string(buf, sizeof(buf), st->codec, is_output);
     av_log(NULL, AV_LOG_INFO, "    Stream #%d.%d", index, i);
     /* the pid is an important information, so we display it */
@@ -3282,7 +3283,7 @@ void av_dump_format(AVFormatContext *ic,
     if(ic->nb_programs) {
         int j, k, total = 0;
         for(j=0; j<ic->nb_programs; j++) {
-            AVMetadataTag *name = av_metadata_get(ic->programs[j]->metadata,
+            AVDictionaryEntry *name = av_dict_get(ic->programs[j]->metadata,
                                                   "name", NULL, 0);
             av_log(NULL, AV_LOG_INFO, "  Program %d %s\n", ic->programs[j]->id,
                    name ? name->value : "");
diff --git a/libavformat/vorbiscomment.c b/libavformat/vorbiscomment.c
index 22176c6fbe..56936d7666 100644
--- a/libavformat/vorbiscomment.c
+++ b/libavformat/vorbiscomment.c
@@ -23,6 +23,7 @@
 #include "metadata.h"
 #include "vorbiscomment.h"
 #include "libavcodec/bytestream.h"
+#include "libavutil/dict.h"
 
 /**
  * VorbisComment metadata conversion mapping.
@@ -36,15 +37,15 @@ const AVMetadataConv ff_vorbiscomment_metadata_conv[] = {
     { 0 }
 };
 
-int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string,
+int ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string,
                             unsigned *count)
 {
     int len = 8;
     len += strlen(vendor_string);
     *count = 0;
     if (m) {
-        AVMetadataTag *tag = NULL;
-        while ((tag = av_metadata_get(m, "", tag, AV_METADATA_IGNORE_SUFFIX))) {
+        AVDictionaryEntry *tag = NULL;
+        while ((tag = av_dict_get(m, "", tag, AV_DICT_IGNORE_SUFFIX))) {
             len += 4 +strlen(tag->key) + 1 + strlen(tag->value);
             (*count)++;
         }
@@ -52,15 +53,15 @@ int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string,
     return len;
 }
 
-int ff_vorbiscomment_write(uint8_t **p, AVMetadata **m,
+int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m,
                            const char *vendor_string, const unsigned count)
 {
     bytestream_put_le32(p, strlen(vendor_string));
     bytestream_put_buffer(p, vendor_string, strlen(vendor_string));
     if (*m) {
-        AVMetadataTag *tag = NULL;
+        AVDictionaryEntry *tag = NULL;
         bytestream_put_le32(p, count);
-        while ((tag = av_metadata_get(*m, "", tag, AV_METADATA_IGNORE_SUFFIX))) {
+        while ((tag = av_dict_get(*m, "", tag, AV_DICT_IGNORE_SUFFIX))) {
             unsigned int len1 = strlen(tag->key);
             unsigned int len2 = strlen(tag->value);
             bytestream_put_le32(p, len1+1+len2);
diff --git a/libavformat/vorbiscomment.h b/libavformat/vorbiscomment.h
index 98cc4f8abe..7b82dc1c95 100644
--- a/libavformat/vorbiscomment.h
+++ b/libavformat/vorbiscomment.h
@@ -35,13 +35,13 @@
  * @param count Pointer to store the number of tags in m because m->count is "not allowed"
  * @return The length in bytes.
  */
-int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string,
+int ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string,
                             unsigned *count);
 
 /**
  * Writes a VorbisComment into a buffer. The buffer, p, must have enough
  * data to hold the whole VorbisComment. The minimum size required can be
- * obtained by passing the same AVMetadata and vendor_string to
+ * obtained by passing the same AVDictionary and vendor_string to
  * ff_vorbiscomment_length()
  *
  * @param p The buffer in which to write.
@@ -49,7 +49,7 @@ int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string,
  * @param vendor_string The vendor string to write.
  * @param count The number of tags in m because m->count is "not allowed"
  */
-int ff_vorbiscomment_write(uint8_t **p, AVMetadata **m,
+int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m,
                            const char *vendor_string, const unsigned count);
 
 extern const AVMetadataConv ff_vorbiscomment_metadata_conv[];
diff --git a/libavformat/vqf.c b/libavformat/vqf.c
index 14fb8d76e3..5be7dfea21 100644
--- a/libavformat/vqf.c
+++ b/libavformat/vqf.c
@@ -21,6 +21,7 @@
 
 #include "avformat.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 
 typedef struct VqfContext {
     int frame_bit_len;
@@ -56,7 +57,7 @@ static void add_metadata(AVFormatContext *s, const char *tag,
         return;
     avio_read(s->pb, buf, len);
     buf[len] = 0;
-    av_metadata_set2(&s->metadata, tag, buf, AV_METADATA_DONT_STRDUP_VAL);
+    av_dict_set(&s->metadata, tag, buf, AV_DICT_DONT_STRDUP_VAL);
 }
 
 static int vqf_read_header(AVFormatContext *s, AVFormatParameters *ap)
diff --git a/libavformat/wc3movie.c b/libavformat/wc3movie.c
index 292ef66b54..e57a9bf844 100644
--- a/libavformat/wc3movie.c
+++ b/libavformat/wc3movie.c
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 
 #define FORM_TAG MKTAG('F', 'O', 'R', 'M')
@@ -130,8 +131,8 @@ static int wc3_read_header(AVFormatContext *s,
             if ((ret = avio_read(pb, buffer, size)) != size)
                 return AVERROR(EIO);
             buffer[size] = 0;
-            av_metadata_set2(&s->metadata, "title", buffer,
-                                   AV_METADATA_DONT_STRDUP_VAL);
+            av_dict_set(&s->metadata, "title", buffer,
+                                   AV_DICT_DONT_STRDUP_VAL);
             break;
 
         case SIZE_TAG:
diff --git a/libavformat/wtv.c b/libavformat/wtv.c
index 0a18c8e517..0f9fdeff06 100644
--- a/libavformat/wtv.c
+++ b/libavformat/wtv.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "internal.h"
 #include "riff.h"
@@ -483,7 +484,7 @@ static void get_attachment(AVFormatContext *s, AVIOContext *pb, int length)
     st = av_new_stream(s, 0);
     if (!st)
         goto done;
-    av_metadata_set2(&st->metadata, "title", description, 0);
+    av_dict_set(&st->metadata, "title", description, 0);
     st->codec->codec_id   = CODEC_ID_MJPEG;
     st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
     st->codec->extradata  = av_mallocz(filesize);
@@ -543,7 +544,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, const char *key, int ty
         return;
     }
 
-    av_metadata_set2(&s->metadata, key, buf, 0);
+    av_dict_set(&s->metadata, key, buf, 0);
     av_freep(&buf);
 }
 
@@ -867,7 +868,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p
                 avio_read(pb, language, 3);
                 if (language[0]) {
                     language[3] = 0;
-                    av_metadata_set2(&st->metadata, "language", language, 0);
+                    av_dict_set(&st->metadata, "language", language, 0);
                     if (!strcmp(language, "nar") || !strcmp(language, "NAR"))
                         st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
                 }
diff --git a/libavformat/wv.c b/libavformat/wv.c
index 7106735041..8f9d0fdb1b 100644
--- a/libavformat/wv.c
+++ b/libavformat/wv.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/audioconvert.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/dict.h"
 #include "avformat.h"
 #include "apetag.h"
 #include "id3v1.h"
@@ -226,7 +227,7 @@ static int wv_read_header(AVFormatContext *s,
     if(s->pb->seekable) {
         int64_t cur = avio_tell(s->pb);
         ff_ape_parse_tag(s);
-        if(!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX))
+        if(!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
             ff_id3v1_read(s);
         avio_seek(s->pb, cur, SEEK_SET);
     }

From a4855adc802622d0487c891743f8259c62958338 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 6 Jun 2011 14:10:22 +0200
Subject: [PATCH 680/830] dvbsubdec: Fix compilation of debug code.

---
 libavcodec/dvbsubdec.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index 66659ae637..3f39a960cb 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -35,6 +35,7 @@
 
 #ifdef DEBUG
 #undef fprintf
+#undef perror
 #if 0
 static void png_save(const char *filename, uint8_t *bitmap, int w, int h,
                      uint32_t *rgba_palette)
@@ -49,7 +50,7 @@ static void png_save(const char *filename, uint8_t *bitmap, int w, int h,
     f = fopen(fname, "w");
     if (!f) {
         perror(fname);
-        exit(1);
+        return;
     }
     fprintf(f, "P6\n"
             "%d %d\n"
@@ -71,7 +72,7 @@ static void png_save(const char *filename, uint8_t *bitmap, int w, int h,
     f = fopen(fname2, "w");
     if (!f) {
         perror(fname2);
-        exit(1);
+        return;
     }
     fprintf(f, "P5\n"
             "%d %d\n"
@@ -105,7 +106,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h)
     f = fopen(fname, "w");
     if (!f) {
         perror(fname);
-        exit(1);
+        return;
     }
     fprintf(f, "P6\n"
             "%d %d\n"
@@ -127,7 +128,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h)
     f = fopen(fname2, "w");
     if (!f) {
         perror(fname2);
-        exit(1);
+        return;
     }
     fprintf(f, "P5\n"
             "%d %d\n"

From b7847a3f2eb56591590ab065b65c335892af6e4c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jun 2011 16:10:17 +0200
Subject: [PATCH 681/830] mov: Remove leftover crufty debug statement with
 references to a local file.

---
 libavformat/mov.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index ff32c92313..245933da3a 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2143,9 +2143,6 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         goto free_and_return;
     atom.type = MKTAG('m','o','o','v');
     atom.size = moov_len;
-#ifdef DEBUG
-//    { int fd = open("/tmp/uncompheader.mov", O_WRONLY | O_CREAT); write(fd, moov_data, moov_len); close(fd); }
-#endif
     ret = mov_read_default(c, &ctx, atom);
 free_and_return:
     av_free(moov_data);

From 5bd6ec6d59737db63b12312f20ac0f3f0b89502e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 8 Jun 2011 14:59:47 +0200
Subject: [PATCH 682/830] options: Add missing braces around struct
 initializer.

This fixes the warning:
libavformat/options.c:62:1: warning: missing braces around initializer [-Wmissing-braces]
---
 libavformat/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/options.c b/libavformat/options.c
index 6ffd1a7435..c11f19e687 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -59,7 +59,7 @@ static const AVOption options[]={
 {"fdebug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, E|D, "fdebug"},
 {"ts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_FDEBUG_TS }, INT_MIN, INT_MAX, E|D, "fdebug"},
 {"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E|D},
-{"fpsprobesize", "number of frames used to probe fps", OFFSET(fps_probe_size), FF_OPT_TYPE_INT, -1, -1, INT_MAX-1, D},
+{"fpsprobesize", "number of frames used to probe fps", OFFSET(fps_probe_size), FF_OPT_TYPE_INT, {.dbl = -1}, -1, INT_MAX-1, D},
 {NULL},
 };
 

From 9e4cb03a93593f8ddb8b4ea3c7ee3bf8acb7ea21 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 8 Jun 2011 16:38:37 +0200
Subject: [PATCH 683/830] Fix "mixed declarations and code" warnings.

---
 libavcodec/j2kdec.c   | 5 ++---
 libavcodec/qtrleenc.c | 6 ++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c
index 2497a2e015..73af6a73df 100644
--- a/libavcodec/j2kdec.c
+++ b/libavcodec/j2kdec.c
@@ -692,6 +692,8 @@ static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Contex
                        int width, int height, int bandpos)
 {
     int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y, clnpass_cnt = 0;
+    int bpass_csty_symbol = J2K_CBLK_BYPASS & codsty->cblk_style;
+    int vert_causal_ctx_csty_symbol = J2K_CBLK_VSC & codsty->cblk_style;
 
     for (y = 0; y < height+2; y++)
         memset(t1->flags[y], 0, (width+2)*sizeof(int));
@@ -703,9 +705,6 @@ static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Contex
     cblk->data[cblk->length] = 0xff;
     cblk->data[cblk->length+1] = 0xff;
 
-    int bpass_csty_symbol = J2K_CBLK_BYPASS & codsty->cblk_style;
-    int vert_causal_ctx_csty_symbol = J2K_CBLK_VSC & codsty->cblk_style;
-
     while(passno--){
         switch(pass_t){
             case 0: decode_sigpass(t1, width, height, bpno+1, bandpos,
diff --git a/libavcodec/qtrleenc.c b/libavcodec/qtrleenc.c
index d43ff7b06b..6258b143ad 100644
--- a/libavcodec/qtrleenc.c
+++ b/libavcodec/qtrleenc.c
@@ -231,10 +231,11 @@ static void qtrle_encode_line(QtrleEncContext *s, AVFrame *p, int line, uint8_t
         else if (rlecode > 0) {
             /* bulk copy */
             if (s->avctx->pix_fmt == PIX_FMT_GRAY8) {
+                int j;
                 // QT grayscale colorspace has 0=white and 255=black, we will
                 // ignore the palette that is included in the AVFrame because
                 // PIX_FMT_GRAY8 has defined color mapping
-                for (int j = 0; j < rlecode*s->pixel_size; ++j)
+                for (j = 0; j < rlecode*s->pixel_size; ++j)
                     bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff);
             } else {
                 bytestream_put_buffer(buf, this_line + i*s->pixel_size, rlecode*s->pixel_size);
@@ -244,8 +245,9 @@ static void qtrle_encode_line(QtrleEncContext *s, AVFrame *p, int line, uint8_t
         else {
             /* repeat the bits */
             if (s->avctx->pix_fmt == PIX_FMT_GRAY8) {
+                int j;
                 // QT grayscale colorspace has 0=white and 255=black, ...
-                for (int j = 0; j < s->pixel_size; ++j)
+                for (j = 0; j < s->pixel_size; ++j)
                     bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff);
             } else {
                 bytestream_put_buffer(buf, this_line + i*s->pixel_size, s->pixel_size);

From 9d84dfce93a8dac7a3b82adab9099312f638b58b Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 11:03:11 -0400
Subject: [PATCH 684/830] swscale: remove unused function.

Use of this wrapper was removed in a previous patch, but I
forgot to actually remove the function itself.
---
 libswscale/swscale.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 8f41547be1..c2259ca294 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -285,35 +285,6 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                                const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
-                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
-                                enum PixelFormat dstFormat)
-{
-#define conv16(bits) \
-    if (isBE(dstFormat)) { \
-        yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
-                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                                 alpSrc, \
-                                 dest, uDest, vDest, aDest, \
-                                 dstW, chrDstW); \
-    } else { \
-        yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
-                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                                 alpSrc, \
-                                 dest, uDest, vDest, aDest, \
-                                 dstW, chrDstW); \
-    }
-    if (is16BPS(dstFormat)) {
-        conv16(16);
-    } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
-        conv16(9);
-    } else {
-        conv16(10);
-    }
-#undef conv16
-}
-
 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **lumSrc, int lumFilterSize,
                               const int16_t *chrFilter, const int16_t **chrUSrc,

From 496d95c34cbc185c04c37be3cbcef941da53989a Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 11:33:46 -0400
Subject: [PATCH 685/830] swscale: move two macros that are only used once into
 caller.

This way, they look like regular code, which is easier to
understand.
---
 libswscale/x86/swscale_template.c | 66 ++++++++++++++-----------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 3646ccceff..2c79d12ab7 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -171,19 +171,6 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
-#define YSCALEYUV2YV121 \
-    "mov %2, %%"REG_a"                    \n\t"\
-    ".p2align               4             \n\t" /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                     const int16_t *chrUSrc, const int16_t *chrVSrc,
                                     const int16_t *alpSrc,
@@ -198,33 +185,25 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
     while (p--) {
         if (dst[p]) {
             __asm__ volatile(
-               YSCALEYUV2YV121
-               :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                  "g" (-counter[p])
-               : "%"REG_a
+                "mov %2, %%"REG_a"                    \n\t"
+                ".p2align               4             \n\t" /* FIXME Unroll? */
+                "1:                                   \n\t"
+                "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"
+                "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"
+                "psraw                 $7, %%mm0      \n\t"
+                "psraw                 $7, %%mm1      \n\t"
+                "packuswb           %%mm1, %%mm0      \n\t"
+                MOVNTQ(%%mm0, (%1, %%REGa))
+                "add                   $8, %%"REG_a"  \n\t"
+                "jnc                   1b             \n\t"
+                :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                   "g" (-counter[p])
+                : "%"REG_a
             );
         }
     }
 }
 
-#define YSCALEYUV2YV121_ACCURATE \
-    "mov %2, %%"REG_a"                    \n\t"\
-    "pcmpeqw %%mm7, %%mm7                 \n\t"\
-    "psrlw                 $15, %%mm7     \n\t"\
-    "psllw                  $6, %%mm7     \n\t"\
-    ".p2align                4            \n\t" /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "paddsw             %%mm7, %%mm0      \n\t"\
-    "paddsw             %%mm7, %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
 static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                        const int16_t *chrUSrc, const int16_t *chrVSrc,
                                        const int16_t *alpSrc,
@@ -239,7 +218,22 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
     while (p--) {
         if (dst[p]) {
             __asm__ volatile(
-                YSCALEYUV2YV121_ACCURATE
+                "mov %2, %%"REG_a"                    \n\t"
+                "pcmpeqw %%mm7, %%mm7                 \n\t"
+                "psrlw                 $15, %%mm7     \n\t"
+                "psllw                  $6, %%mm7     \n\t"
+                ".p2align                4            \n\t" /* FIXME Unroll? */
+                "1:                                   \n\t"
+                "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"
+                "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"
+                "paddsw             %%mm7, %%mm0      \n\t"
+                "paddsw             %%mm7, %%mm1      \n\t"
+                "psraw                 $7, %%mm0      \n\t"
+                "psraw                 $7, %%mm1      \n\t"
+                "packuswb           %%mm1, %%mm0      \n\t"
+                MOVNTQ(%%mm0, (%1, %%REGa))
+                "add                   $8, %%"REG_a"  \n\t"
+                "jnc                   1b             \n\t"
                 :: "r" (src[p]), "r" (dst[p] + counter[p]),
                    "g" (-counter[p])
                 : "%"REG_a

From 9bcbb250e23959075765edd3cb4c1fcb46736d7d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 11:39:26 -0400
Subject: [PATCH 686/830] swscale: fix types of assembly arguments.

This prevents the following compiler warnings: "warning:
initialization from incompatible pointer type". Since the
variables are only ever used in inline assembly, their type
is actually irrelevant (so the part where it was wrong did
not invoke any buggy behaviour).
---
 libswscale/x86/swscale_template.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 2c79d12ab7..8eb18050a2 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -178,7 +178,7 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                     uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
-    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -211,7 +211,7 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                        uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
-    const uint8_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
+    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
 
@@ -2046,7 +2046,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
                                         int dstWidth, const uint8_t *src, int srcW,
                                         int xInc)
 {
-    int32_t *filterPos = c->hLumFilterPos;
+    int16_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
     void    *mmx2FilterCode= c->lumMmx2FilterCode;
     int i;
@@ -2118,7 +2118,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
                                         int dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
-    int32_t *filterPos = c->hChrFilterPos;
+    int16_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
     void    *mmx2FilterCode= c->chrMmx2FilterCode;
     int i;

From 47d2ca3205b53665328fe301879c339449db7a1d Mon Sep 17 00:00:00 2001
From: Mina Nagy Zaki <mnzaki@gmail.com>
Date: Tue, 7 Jun 2011 17:42:32 +0300
Subject: [PATCH 687/830] lavfi: handle NULL lists in avfilter_make_format_list

---
 libavfilter/avfilter.h |  5 +++--
 libavfilter/formats.c  | 13 ++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 541dbe7aa7..ac954ca198 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -27,7 +27,7 @@
 
 #define LIBAVFILTER_VERSION_MAJOR  2
 #define LIBAVFILTER_VERSION_MINOR 14
-#define LIBAVFILTER_VERSION_MICRO  0
+#define LIBAVFILTER_VERSION_MICRO  1
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
@@ -233,7 +233,8 @@ typedef struct AVFilterFormats {
  * Create a list of supported formats. This is intended for use in
  * AVFilter->query_formats().
  *
- * @param fmts list of media formats, terminated by -1
+ * @param fmts list of media formats, terminated by -1. If NULL an
+ *        empty list is created.
  * @return the format list, with no existing references
  */
 AVFilterFormats *avfilter_make_format_list(const int *fmts);
diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index 101ef09e5f..ec7fca3817 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -73,15 +73,18 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)
 AVFilterFormats *avfilter_make_format_list(const int *fmts)
 {
     AVFilterFormats *formats;
-    int count;
+    int count = 0;
 
-    for (count = 0; fmts[count] != -1; count++)
-        ;
+    if (fmts)
+        for (count = 0; fmts[count] != -1; count++)
+            ;
 
     formats               = av_mallocz(sizeof(AVFilterFormats));
-    formats->formats      = av_malloc(sizeof(*formats->formats) * count);
     formats->format_count = count;
-    memcpy(formats->formats, fmts, sizeof(*formats->formats) * count);
+    if (count) {
+        formats->formats  = av_malloc(sizeof(*formats->formats) * count);
+        memcpy(formats->formats, fmts, sizeof(*formats->formats) * count);
+    }
 
     return formats;
 }

From dbaba52ed25f79c8be0ce1a478b229bf868f8c11 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 6 Jun 2011 13:14:05 +0200
Subject: [PATCH 688/830] lavf: don't try to free private options if priv_data
 is NULL.

This might happen if there was an error before priv_data was allocated
and result in segfault.
---
 libavformat/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 8ab59e1d3b..c198af07ed 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2551,7 +2551,7 @@ void avformat_free_context(AVFormatContext *s)
     AVStream *st;
 
     av_opt_free(s);
-    if (s->iformat && s->iformat->priv_class)
+    if (s->iformat && s->iformat->priv_class && s->priv_data)
         av_opt_free(s->priv_data);
 
     for(i=0;i<s->nb_streams;i++) {

From be20528ced76a43aafed032742b3ddd1897bf55e Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Tue, 7 Jun 2011 11:09:58 -0700
Subject: [PATCH 689/830] aac: fix adts frame size mask, fix demuxer probing
 for some files.

---
 libavformat/aacdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/aacdec.c b/libavformat/aacdec.c
index 47e9bf3a18..a224c96fba 100644
--- a/libavformat/aacdec.c
+++ b/libavformat/aacdec.c
@@ -44,7 +44,7 @@ static int adts_aac_probe(AVProbeData *p)
             uint32_t header = AV_RB16(buf2);
             if((header&0xFFF6) != 0xFFF0)
                 break;
-            fsize = (AV_RB32(buf2+3)>>13) & 0x8FFF;
+            fsize = (AV_RB32(buf2+3)>>13) & 0x1FFF;
             if(fsize < 7)
                 break;
             buf2 += fsize;

From a71bcd1a7f66e210971c44452dc4cdae7bdbd98a Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Wed, 8 Jun 2011 02:20:53 +0200
Subject: [PATCH 690/830] crypto: Use av_freep instead of av_free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a potential double free.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index 789a4d1e76..5e7ee1eba3 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -97,8 +97,8 @@ static int crypto_open(URLContext *h, const char *uri, int flags)
 
     return 0;
 err:
-    av_free(c->key);
-    av_free(c->iv);
+    av_freep(&c->key);
+    av_freep(&c->iv);
     return ret;
 }
 

From 1fe3bf3e4b4f4a456204cc9512d0f7088231d109 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 8 Jun 2011 16:14:40 +0200
Subject: [PATCH 691/830] Revert "build: remove empty $(OBJS) target"

This reverts commit b9c6c7cb25932b594fd684a0cb553e439d49fe12.
It appears this caused the .o files to be deletec under some circumstances.
---
 common.mak | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/common.mak b/common.mak
index cff312994e..ae9698a1eb 100644
--- a/common.mak
+++ b/common.mak
@@ -58,6 +58,8 @@ HOSTCFLAGS += $(IFLAGS)
 # so this saves some time on slow systems.
 .SUFFIXES:
 
+# Do not delete intermediate files from chains of implicit rules
+$(OBJS):
 endif
 
 OBJS-$(HAVE_MMX) +=  $(MMX-OBJS-yes)

From c1ad93c08ce0b6ea2f4505267fe3aa7584701dca Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 9 Jun 2011 01:39:49 +0200
Subject: [PATCH 692/830] Revert "crypto: fix potential double free"

This reverts commit 7d89f7cbf3ccd98f9a5f58db97effa9afd2d571a.

Revert at authors request, and its buggy missing &
---
 libavformat/crypto.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index 03bfeddba4..789a4d1e76 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -97,8 +97,8 @@ static int crypto_open(URLContext *h, const char *uri, int flags)
 
     return 0;
 err:
-    av_freep(c->key);
-    av_freep(c->iv);
+    av_free(c->key);
+    av_free(c->iv);
     return ret;
 }
 
@@ -157,6 +157,8 @@ static int crypto_close(URLContext *h)
     if (c->hd)
         ffurl_close(c->hd);
     av_freep(&c->aes);
+    av_freep(&c->key);
+    av_freep(&c->iv);
     return 0;
 }
 

From c8d0d8bc767309d5e8d9ee64addc11117190338e Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Wed, 8 Jun 2011 02:20:53 +0200
Subject: [PATCH 693/830] crypto: Use av_freep instead of av_free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a potential double free.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index 789a4d1e76..5e7ee1eba3 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -97,8 +97,8 @@ static int crypto_open(URLContext *h, const char *uri, int flags)
 
     return 0;
 err:
-    av_free(c->key);
-    av_free(c->iv);
+    av_freep(&c->key);
+    av_freep(&c->iv);
     return ret;
 }
 

From 7b8ed831eb8432d202dad16dedc1758b018bb1fa Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 9 Jun 2011 03:35:50 +0200
Subject: [PATCH 694/830] jpegdec: actually search for and parse RSTn

Fixes Ticket267

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mjpegdec.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index afcc1b74a7..4e58feffad 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -881,9 +881,12 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i
                 }
             }
 
-            if (s->restart_interval && !--s->restart_count) {
+            if (s->restart_interval && show_bits(&s->gb, 8) == 0xFF){/* skip RSTn */
+                --s->restart_count;
                 align_get_bits(&s->gb);
-                skip_bits(&s->gb, 16); /* skip RSTn */
+                while(show_bits(&s->gb, 8) == 0xFF)
+                    skip_bits(&s->gb, 8);
+                skip_bits(&s->gb, 8);
                 for (i=0; i<nb_components; i++) /* reset dc */
                     s->last_dc[i] = 1024;
             }

From a27db4c349574d44c581fa484f03fb1ee816aaf9 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 12:12:50 -0400
Subject: [PATCH 695/830] swscale: fix function declaration keywords in
 x86/swscale_template.c.

Remove inline keyword for functions that are only called through
their function pointers (and thus cannot be inlined); add av_cold
keyword to init function, and use av_always_inline instead of
inline for functions that must be inlined for performance reasons.
---
 libswscale/x86/swscale_template.c | 383 ++++++++++++++++--------------
 1 file changed, 202 insertions(+), 181 deletions(-)

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 8eb18050a2..8fad257ddf 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -70,13 +70,13 @@
         : "%"REG_d, "%"REG_S\
     );
 
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc,
-                                    int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                             const int16_t **lumSrc, int lumFilterSize,
+                             const int16_t *chrFilter, const int16_t **chrUSrc,
+                             const int16_t **chrVSrc,
+                             int chrFilterSize, const int16_t **alpSrc,
+                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                             uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -151,13 +151,13 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
-static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -171,11 +171,11 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
-                                    const int16_t *chrUSrc, const int16_t *chrVSrc,
-                                    const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
+                             const int16_t *chrUSrc, const int16_t *chrVSrc,
+                             const int16_t *alpSrc,
+                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                             uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -204,11 +204,11 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
     }
 }
 
-static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
-                                       const int16_t *chrUSrc, const int16_t *chrVSrc,
-                                       const int16_t *alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
+                                const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                const int16_t *alpSrc,
+                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -458,12 +458,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
     " jb      1b                \n\t"
 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
 
-static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                          const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                                          const int16_t **chrVSrc,
-                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                   const int16_t **lumSrc, int lumFilterSize,
+                                   const int16_t *chrFilter, const int16_t **chrUSrc,
+                                   const int16_t **chrVSrc,
+                                   int chrFilterSize, const int16_t **alpSrc,
+                                   uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -491,12 +491,12 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
     }
 }
 
-static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -548,12 +548,12 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
     " jb             1b             \n\t"
 #define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
 
-static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                           const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrUSrc,
-                                           const int16_t **chrVSrc,
-                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -572,12 +572,12 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
-                                        const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                                        const int16_t **chrVSrc,
-                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -625,12 +625,12 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
-static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                           const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrUSrc,
-                                           const int16_t **chrVSrc,
-                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -649,12 +649,12 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
-                                        const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                                        const int16_t **chrVSrc,
-                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -782,12 +782,12 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 #define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
 #endif
 
-static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                          const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                                          const int16_t **chrVSrc,
-                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                   const int16_t **lumSrc, int lumFilterSize,
+                                   const int16_t *chrFilter, const int16_t **chrUSrc,
+                                   const int16_t **chrVSrc,
+                                   int chrFilterSize, const int16_t **alpSrc,
+                                   uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -806,12 +806,12 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
     );
 }
 
-static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -847,12 +847,12 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     " jb          1b            \n\t"
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
-static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                            const int16_t **lumSrc, int lumFilterSize,
-                                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                                            const int16_t **chrVSrc,
-                                            int chrFilterSize, const int16_t **alpSrc,
-                                            uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                     const int16_t **lumSrc, int lumFilterSize,
+                                     const int16_t *chrFilter, const int16_t **chrUSrc,
+                                     const int16_t **chrVSrc,
+                                     int chrFilterSize, const int16_t **alpSrc,
+                                     uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -868,12 +868,12 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
-                                         const int16_t **lumSrc, int lumFilterSize,
-                                         const int16_t *chrFilter, const int16_t **chrUSrc,
-                                         const int16_t **chrVSrc,
-                                         int chrFilterSize, const int16_t **alpSrc,
-                                         uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
+                                  const int16_t **lumSrc, int lumFilterSize,
+                                  const int16_t *chrFilter, const int16_t **chrUSrc,
+                                  const int16_t **chrVSrc,
+                                  int chrFilterSize, const int16_t **alpSrc,
+                                  uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -969,12 +969,12 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *ubuf0,
-                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                       const uint16_t *vbuf1, const uint16_t *abuf0,
-                                       const uint16_t *abuf1, uint8_t *dest,
-                                       int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *buf1, const uint16_t *ubuf0,
+                                const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                const uint16_t *vbuf1, const uint16_t *abuf0,
+                                const uint16_t *abuf1, uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y)
 {
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
@@ -1031,12 +1031,12 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *ubuf0,
-                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                       const uint16_t *vbuf1, const uint16_t *abuf0,
-                                       const uint16_t *abuf1, uint8_t *dest,
-                                       int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *buf1, const uint16_t *ubuf0,
+                                const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                const uint16_t *vbuf1, const uint16_t *abuf0,
+                                const uint16_t *abuf1, uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1053,12 +1053,12 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *ubuf0,
-                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                        const uint16_t *vbuf1, const uint16_t *abuf0,
-                                        const uint16_t *abuf1, uint8_t *dest,
-                                        int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest,
+                                 int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1081,12 +1081,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *ubuf0,
-                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                        const uint16_t *vbuf1, const uint16_t *abuf0,
-                                        const uint16_t *abuf1, uint8_t *dest,
-                                        int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest,
+                                 int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1149,12 +1149,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
 
 #define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
 
-static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *buf1, const uint16_t *ubuf0,
-                                         const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                         const uint16_t *vbuf1, const uint16_t *abuf0,
-                                         const uint16_t *abuf1, uint8_t *dest,
-                                         int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
+                                  const uint16_t *buf1, const uint16_t *ubuf0,
+                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                  const uint16_t *vbuf1, const uint16_t *abuf0,
+                                  const uint16_t *abuf1, uint8_t *dest,
+                                  int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1288,12 +1288,12 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest,
-                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                       int flags, int y)
+static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1356,12 +1356,12 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest,
-                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                       int flags, int y)
+static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1394,12 +1394,12 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                        const uint16_t *abuf0, uint8_t *dest,
-                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                        int flags, int y)
+static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1444,12 +1444,12 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                        const uint16_t *abuf0, uint8_t *dest,
-                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                        int flags, int y)
+static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1531,12 +1531,12 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm7     \n\t"
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
-static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                         const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                         const uint16_t *abuf0, uint8_t *dest,
-                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                         int flags, int y)
+static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
+                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                  const uint16_t *abuf0, uint8_t *dest,
+                                  int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                  int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1570,7 +1570,8 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
+                            int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm2           \n\t"
@@ -1589,7 +1590,9 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width,
     );
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1615,7 +1618,9 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV,
+                           const uint8_t *src1, const uint8_t *src2,
+                           int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                    %0, %%"REG_a"       \n\t"
@@ -1641,7 +1646,8 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
+                            int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                  %0, %%"REG_a"         \n\t"
@@ -1659,7 +1665,9 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width,
     );
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1685,7 +1693,9 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV,
+                           const uint8_t *src1, const uint8_t *src2,
+                           int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1710,8 +1720,8 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
     );
 }
 
-static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, int width)
+static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
+                                              const uint8_t *src, int width)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1736,22 +1746,23 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
     );
 }
 
-static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    int width, uint32_t *unused)
+static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }
 
-static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    int width, uint32_t *unused)
+static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat)
+static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src,
+                                                  int width, enum PixelFormat srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24) {
@@ -1804,7 +1815,21 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int wi
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat)
+static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src,
+                             int width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
+}
+
+static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src,
+                             int width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
+}
+
+static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV,
+                                                   const uint8_t *src, int width,
+                                                   enum PixelFormat srcFormat)
 {
     __asm__ volatile(
         "movq                    24(%4), %%mm6       \n\t"
@@ -1862,23 +1887,17 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
     );
 }
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
-}
-
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
-}
-
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
 {
     assert(src1==src2);
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -1886,8 +1905,10 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
 
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, int filterSize)
+static void RENAME(hScale)(int16_t *dst, int dstW,
+                           const uint8_t *src, int srcW,
+                           int xInc, const int16_t *filter,
+                           const int16_t *filterPos, int filterSize)
 {
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
@@ -2042,9 +2063,9 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
 #if COMPILE_TEMPLATE_MMX2
-static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        int dstWidth, const uint8_t *src, int srcW,
-                                        int xInc)
+static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
+                                 int dstWidth, const uint8_t *src,
+                                 int srcW, int xInc)
 {
     int16_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
@@ -2114,9 +2135,9 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
         dst[i] = src[srcW-1]*128;
 }
 
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                        int dstWidth, const uint8_t *src1,
-                                        const uint8_t *src2, int srcW, int xInc)
+static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
+                                 int dstWidth, const uint8_t *src1,
+                                 const uint8_t *src2, int srcW, int xInc)
 {
     int16_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
@@ -2177,7 +2198,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
-static void RENAME(sws_init_swScale)(SwsContext *c)
+static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;

From 6e5a8d3c9affbb242e39cea29bd44003361504d4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 12:31:11 -0400
Subject: [PATCH 696/830] swscale: fix function declarations in swscale.c.

Remove inline keyword from functions that are never inlined.
Use av_always_inline for functions that should be force-inlined
for performance reasons. Use av_cold for init functions.
---
 libswscale/swscale.c | 67 ++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index c2259ca294..b6561408e6 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -285,13 +285,13 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
-                              const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrUSrc,
-                              const int16_t **chrVSrc,
-                              int chrFilterSize, const int16_t **alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
+static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc,
+                       int chrFilterSize, const int16_t **alpSrc,
+                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                       uint8_t *aDest, int dstW, int chrDstW)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
@@ -327,16 +327,15 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 
             aDest[i]= av_clip_uint8(val>>19);
         }
-
 }
 
-static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
-                               const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc, int chrFilterSize,
-                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
-                               uint8_t *vDest, uint8_t *aDest,
-                               int dstW, int chrDstW)
+static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
+                        const int16_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
+                        const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
+                        uint8_t *vDest, uint8_t *aDest,
+                        int dstW, int chrDstW)
 {
     enum PixelFormat dstFormat = c->dstFormat;
 
@@ -857,11 +856,11 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 }
 
-static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
-                                   const int16_t **lumSrc, int lumFilterSize,
-                                   const int16_t *chrFilter, const int16_t **chrUSrc,
-                                   const int16_t **chrVSrc, int chrFilterSize,
-                                   const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
+                            const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     int step= c->dstFormatBpp/8;
@@ -943,7 +942,9 @@ static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
-static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
+static av_always_inline void fillPlane(uint8_t* plane, int stride,
+                                       int width, int height,
+                                       int y, uint8_t val)
 {
     int i;
     uint8_t *ptr = plane + stride*y;
@@ -1469,12 +1470,12 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
 }
 
 // *** horizontal scale Y line to temp buffer
-static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
-                           const uint8_t *src, int srcW, int xInc,
-                           const int16_t *hLumFilter,
-                           const int16_t *hLumFilterPos, int hLumFilterSize,
-                           uint8_t *formatConvBuffer,
-                           uint32_t *pal, int isAlpha)
+static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
+                                     const uint8_t *src, int srcW, int xInc,
+                                     const int16_t *hLumFilter,
+                                     const int16_t *hLumFilterPos, int hLumFilterSize,
+                                     uint8_t *formatConvBuffer,
+                                     uint32_t *pal, int isAlpha)
 {
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
@@ -1509,11 +1510,11 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
     }
 }
 
-static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
-                           const uint8_t *src1, const uint8_t *src2,
-                           int srcW, int xInc, const int16_t *hChrFilter,
-                           const int16_t *hChrFilterPos, int hChrFilterSize,
-                           uint8_t *formatConvBuffer, uint32_t *pal)
+static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
+                                     const uint8_t *src1, const uint8_t *src2,
+                                     int srcW, int xInc, const int16_t *hChrFilter,
+                                     const int16_t *hChrFilterPos, int hChrFilterSize,
+                                     uint8_t *formatConvBuffer, uint32_t *pal)
 {
     if (c->chrToYV12) {
         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
@@ -1826,7 +1827,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     return dstY - lastDstY;
 }
 
-static void sws_init_swScale_c(SwsContext *c)
+static av_cold void sws_init_swScale_c(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 

From e458b536052a35fe3b0f221ca3ccb308faa84f35 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 7 Jun 2011 11:06:44 -0400
Subject: [PATCH 697/830] swscale: merge macros that are used only once.

This reduces source code size without affecting the binary.
---
 libswscale/swscale.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b6561408e6..98331018c0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -381,7 +381,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
-#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
+#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
         int Y1 = 1<<18;\
@@ -413,10 +413,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             }\
             A1>>=19;\
             A2>>=19;\
-        }
-
-#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
-        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
+        }\
         if ((Y1|Y2|U|V)&256) {\
             if (Y1>255)   Y1=255; \
             else if (Y1<0)Y1=0;   \
@@ -432,7 +429,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2=av_clip_uint8(A2);\
         }
 
-#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
+#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
     for (i=0; i<dstW; i++) {\
         int j;\
         int Y = 0;\
@@ -458,10 +455,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A >>=19;\
             if (A&256)\
                 A = av_clip_uint8(A);\
-        }
-
-#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
-    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
+        }\
         Y-= c->yuv2rgb_y_offset;\
         Y*= c->yuv2rgb_y_coeff;\
         Y+= rnd;\

From f1e0b90c640235916d5b2f98a3b2d5a9975b740f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 7 Jun 2011 11:22:54 -0400
Subject: [PATCH 698/830] swscale: use standard clipping functions.

This generates better code on some non-x86 architectures.
---
 libswscale/swscale.c | 53 +++++++++++++++++---------------------------
 1 file changed, 20 insertions(+), 33 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 98331018c0..9d5a373375 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -404,6 +404,12 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         Y2>>=19;\
         U >>=19;\
         V >>=19;\
+        if ((Y1|Y2|U|V)&0x100) {\
+            Y1 = av_clip_uint8(Y1); \
+            Y2 = av_clip_uint8(Y2); \
+            U  = av_clip_uint8(U); \
+            V  = av_clip_uint8(V); \
+        }\
         if (alpha) {\
             A1 = 1<<18;\
             A2 = 1<<18;\
@@ -413,20 +419,10 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             }\
             A1>>=19;\
             A2>>=19;\
-        }\
-        if ((Y1|Y2|U|V)&256) {\
-            if (Y1>255)   Y1=255; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>255)   Y2=255; \
-            else if (Y2<0)Y2=0;   \
-            if (U>255)    U=255;  \
-            else if (U<0) U=0;    \
-            if (V>255)    V=255;  \
-            else if (V<0) V=0;    \
-        }\
-        if (alpha && ((A1|A2)&256)) {\
-            A1=av_clip_uint8(A1);\
-            A2=av_clip_uint8(A2);\
+            if ((A1|A2)&0x100) {\
+                A1 = av_clip_uint8(A1); \
+                A2 = av_clip_uint8(A2); \
+            }\
         }
 
 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
@@ -453,7 +449,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             for (j=0; j<lumFilterSize; j++)\
                 A += alpSrc[j][i     ] * lumFilter[j];\
             A >>=19;\
-            if (A&256)\
+            if (A&0x100)\
                 A = av_clip_uint8(A);\
         }\
         Y-= c->yuv2rgb_y_offset;\
@@ -463,12 +459,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
         B= Y +                          U*c->yuv2rgb_u2b_coeff;\
         if ((R|G|B)&(0xC0000000)) {\
-            if (R>=(256<<22))   R=(256<<22)-1; \
-            else if (R<0)R=0;   \
-            if (G>=(256<<22))   G=(256<<22)-1; \
-            else if (G<0)G=0;   \
-            if (B>=(256<<22))   B=(256<<22)-1; \
-            else if (B<0)B=0;   \
+            R = av_clip_uintp2(R, 30); \
+            G = av_clip_uintp2(G, 30); \
+            B = av_clip_uintp2(B, 30); \
         }
 
 #define YSCALE_YUV_2_GRAY16_C \
@@ -476,8 +469,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         int j;\
         int Y1 = 1<<18;\
         int Y2 = 1<<18;\
-        int U  = 1<<18;\
-        int V  = 1<<18;\
         \
         const int i2= 2*i;\
         \
@@ -487,11 +478,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
         Y1>>=11;\
         Y2>>=11;\
-        if ((Y1|Y2|U|V)&65536) {\
-            if (Y1>65535)   Y1=65535; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>65535)   Y2=65535; \
-            else if (Y2<0)Y2=0;   \
+        if ((Y1|Y2)&0x1000) {\
+            Y1 = av_clip_uint16(Y1); \
+            Y2 = av_clip_uint16(Y2); \
         }
 
 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
@@ -604,11 +593,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
         Y1>>=19;\
         Y2>>=19;\
-        if ((Y1|Y2)&256) {\
-            if (Y1>255)   Y1=255;\
-            else if (Y1<0)Y1=0;\
-            if (Y2>255)   Y2=255;\
-            else if (Y2<0)Y2=0;\
+        if ((Y1|Y2)&0x100) {\
+            Y1 = av_clip_uint8(Y1); \
+            Y2 = av_clip_uint8(Y2); \
         }\
         acc+= acc + g[Y1+d128[(i+0)&7]];\
         acc+= acc + g[Y2+d128[(i+1)&7]];\

From aa39f5f6d61c8c2640dd39520419264ffa1850de Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 13:29:09 -0400
Subject: [PATCH 699/830] swscale: extract gray16 output functions from
 yuv2packed[12X]().

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 197 +++++++++++++++++++++++++++++++------------
 1 file changed, 142 insertions(+), 55 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 9d5a373375..7398043e9e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -381,6 +381,126 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
+static av_always_inline void
+yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                        const int16_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
+                        const int16_t **alpSrc, uint8_t *dest, int dstW,
+                        int y, enum PixelFormat target)
+{
+    int i;
+
+#define output_pixel(pos, val) \
+        if (target == PIX_FMT_GRAY16BE) { \
+            AV_WB16(pos, val); \
+        } else { \
+            AV_WL16(pos, val); \
+        }
+    for (i = 0; i < (dstW >> 1); i++) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+        const int i2 = 2 * i;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i2]   * lumFilter[j];
+            Y2 += lumSrc[j][i2+1] * lumFilter[j];
+        }
+        Y1 >>= 11;
+        Y2 >>= 11;
+        if ((Y1 | Y2) & 0x10000) {
+            Y1 = av_clip_uint16(Y1);
+            Y2 = av_clip_uint16(Y2);
+        }
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+}
+
+static av_always_inline void
+yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
+                        const uint16_t *buf1, const uint16_t *ubuf0,
+                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                        const uint16_t *vbuf1, const uint16_t *abuf0,
+                        const uint16_t *abuf1, uint8_t *dest, int dstW,
+                        int yalpha, int uvalpha, int y,
+                        enum PixelFormat target)
+{
+    int  yalpha1 = 4095 - yalpha; \
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        const int i2 = 2 * i;
+        int Y1 = (buf0[i2  ] * yalpha1 + buf1[i2  ] * yalpha) >> 11;
+        int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
+
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+}
+
+static av_always_inline void
+yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
+                        const uint16_t *abuf0, uint8_t *dest, int dstW,
+                        int uvalpha, enum PixelFormat dstFormat,
+                        int flags, int y, enum PixelFormat target)
+{
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        const int i2 = 2 * i;
+        int Y1 = buf0[i2  ] << 1;
+        int Y2 = buf0[i2+1] << 1;
+
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+#undef output_pixel
+}
+
+#define YUV2PACKEDWRAPPER(name, ext, fmt) \
+static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
+                        const int16_t **lumSrc, int lumFilterSize, \
+                        const int16_t *chrFilter, const int16_t **chrUSrc, \
+                        const int16_t **chrVSrc, int chrFilterSize, \
+                        const int16_t **alpSrc, uint8_t *dest, int dstW, \
+                        int y) \
+{ \
+    name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                          alpSrc, dest, dstW, y, fmt); \
+} \
+ \
+static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
+                        const uint16_t *buf1, const uint16_t *ubuf0, \
+                        const uint16_t *ubuf1, const uint16_t *vbuf0, \
+                        const uint16_t *vbuf1, const uint16_t *abuf0, \
+                        const uint16_t *abuf1, uint8_t *dest, int dstW, \
+                        int yalpha, int uvalpha, int y) \
+{ \
+    name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
+                          vbuf0, vbuf1, abuf0, abuf1, \
+                          dest, dstW, yalpha, uvalpha, y, fmt); \
+} \
+ \
+static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
+                        const uint16_t *ubuf0, const uint16_t *ubuf1, \
+                        const uint16_t *vbuf0, const uint16_t *vbuf1, \
+                        const uint16_t *abuf0, uint8_t *dest, int dstW, \
+                        int uvalpha, enum PixelFormat dstFormat, \
+                        int flags, int y) \
+{ \
+    name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
+                          vbuf1, abuf0, dest, dstW, uvalpha, \
+                          dstFormat, flags, y, fmt); \
+}
+
+YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
+YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
+
 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
@@ -464,25 +584,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             B = av_clip_uintp2(B, 30); \
         }
 
-#define YSCALE_YUV_2_GRAY16_C \
-    for (i=0; i<(dstW>>1); i++) {\
-        int j;\
-        int Y1 = 1<<18;\
-        int Y2 = 1<<18;\
-        \
-        const int i2= 2*i;\
-        \
-        for (j=0; j<lumFilterSize; j++) {\
-            Y1 += lumSrc[j][i2] * lumFilter[j];\
-            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
-        }\
-        Y1>>=11;\
-        Y2>>=11;\
-        if ((Y1|Y2)&0x1000) {\
-            Y1 = av_clip_uint16(Y1); \
-            Y2 = av_clip_uint16(Y2); \
-        }
-
 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
     YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
     r = (type *)c->table_rV[V];   \
@@ -503,12 +604,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
         }
 
-#define YSCALE_YUV_2_GRAY16_2_C   \
-    for (i=0; i<(dstW>>1); i++) { \
-        const int i2= 2*i;       \
-        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
-        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
-
 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
     YSCALE_YUV_2_PACKED2_C(type,alpha)\
     r = (type *)c->table_rV[V];\
@@ -529,12 +624,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2= abuf0[i2+1]>>7;\
         }
 
-#define YSCALE_YUV_2_GRAY16_1_C \
-    for (i=0; i<(dstW>>1); i++) {\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]<<1;\
-        int Y2= buf0[i2+1]<<1;
-
 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
     YSCALE_YUV_2_PACKED1_C(type,alpha)\
     r = (type *)c->table_rV[V];\
@@ -605,7 +694,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
     }
 
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
+#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
     switch(c->dstFormat) {\
     case PIX_FMT_RGB48BE:\
     case PIX_FMT_RGB48LE:\
@@ -809,22 +898,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             ((uint8_t*)dest)[2*i2+3]= Y2;\
         }                \
         break;\
-    case PIX_FMT_GRAY16BE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+1]= Y1;\
-            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
-            ((uint8_t*)dest)[2*i2+3]= Y2;\
-        }                \
-        break;\
-    case PIX_FMT_GRAY16LE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1;\
-            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+2]= Y2;\
-            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
-        }                \
-        break;\
     }
 
 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
@@ -834,7 +907,7 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
 }
 
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
@@ -1158,7 +1231,7 @@ static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
     int uvalpha1=4095-uvalpha;
     int i;
 
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
 }
 
 /**
@@ -1178,9 +1251,9 @@ static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
     const int yalpha= 4096; //FIXME ...
 
     if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
     } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
     }
 }
 
@@ -1540,9 +1613,23 @@ find_c_packed_planar_out_funcs(SwsContext *c,
     if(c->flags & SWS_FULL_CHR_H_INT) {
         *yuv2packedX = yuv2rgbX_c_full;
     } else {
-        *yuv2packed1  = yuv2packed1_c;
-        *yuv2packed2  = yuv2packed2_c;
-        *yuv2packedX  = yuv2packedX_c;
+        switch (dstFormat) {
+        case PIX_FMT_GRAY16BE:
+            *yuv2packed1 = yuv2gray16BE_1_c;
+            *yuv2packed2 = yuv2gray16BE_2_c;
+            *yuv2packedX = yuv2gray16BE_X_c;
+            break;
+        case PIX_FMT_GRAY16LE:
+            *yuv2packed1 = yuv2gray16LE_1_c;
+            *yuv2packed2 = yuv2gray16LE_2_c;
+            *yuv2packedX = yuv2gray16LE_X_c;
+            break;
+        default:
+            *yuv2packed1 = yuv2packed1_c;
+            *yuv2packed2 = yuv2packed2_c;
+            *yuv2packedX = yuv2packedX_c;
+            break;
+        }
     }
 }
 

From f30ee65700cc2def6447de09c91afa3f7ecc7639 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 14:50:49 -0400
Subject: [PATCH 700/830] swscale: change 9/10bit YUV input macros to inline
 functions.

Inline functions are slightly larger in source code, but
are easier to handle in source code editors. The binary code
generated is the same.
---
 libswscale/swscale.c | 70 ++++++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7398043e9e..b505c0cec0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1344,33 +1344,51 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 }
 
 // FIXME Maybe dither instead.
-#define YUV_NBPS(depth, endianness, rfunc) \
-static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
-                                          const uint8_t *_srcU, const uint8_t *_srcV, \
-                                          int width, uint32_t *unused) \
-{ \
-    int i; \
-    const uint16_t *srcU = (const uint16_t*)_srcU; \
-    const uint16_t *srcV = (const uint16_t*)_srcV; \
-    for (i = 0; i < width; i++) { \
-        dstU[i] = rfunc(&srcU[i])>>(depth-8); \
-        dstV[i] = rfunc(&srcV[i])>>(depth-8); \
-    } \
-} \
-\
-static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
-                                         int width, uint32_t *unused) \
-{ \
-    int i; \
-    const uint16_t *srcY = (const uint16_t*)_srcY; \
-    for (i = 0; i < width; i++) \
-        dstY[i] = rfunc(&srcY[i])>>(depth-8); \
-} \
+static av_always_inline void
+yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                          const uint8_t *_srcU, const uint8_t *_srcV,
+                          int width, enum PixelFormat origin, int depth)
+{
+    int i;
+    const uint16_t *srcU = (const uint16_t *) _srcU;
+    const uint16_t *srcV = (const uint16_t *) _srcV;
 
-YUV_NBPS( 9, LE, AV_RL16)
-YUV_NBPS( 9, BE, AV_RB16)
-YUV_NBPS(10, LE, AV_RL16)
-YUV_NBPS(10, BE, AV_RB16)
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+    for (i = 0; i < width; i++) {
+        dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
+        dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
+    }
+}
+
+static av_always_inline void
+yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
+                         int width, enum PixelFormat origin, int depth)
+{
+    int i;
+    const uint16_t *srcY = (const uint16_t*)_srcY;
+
+    for (i = 0; i < width; i++)
+        dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
+#undef input_pixel
+}
+
+#define YUV_NBPS(depth, BE_LE, origin) \
+static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                     const uint8_t *srcU, const uint8_t *srcV, \
+                                     int width, uint32_t *unused) \
+{ \
+    yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
+} \
+static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
+                                    int width, uint32_t *unused) \
+{ \
+    yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
+}
+
+YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
+YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
+YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
+YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
 
 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
                        int width, uint32_t *unused)

From dbd3183935e252aaf5796638d4711cff27c75934 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 15:12:32 -0400
Subject: [PATCH 701/830] swscale: change 48bit RGB input macros to inline
 functions.

Inline functions are slightly larger in source code, but
are easier to handle in source code editors. The binary code
generated is the same.
---
 libswscale/swscale.c | 129 ++++++++++++++++++++++++++-----------------
 1 file changed, 78 insertions(+), 51 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b505c0cec0..be42bcfbef 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1008,58 +1008,85 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
     }
 }
 
-#define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
-                       uint8_t *dst, const uint8_t *src, int width, \
-                       uint32_t *unused) \
-{ \
-    int i; \
-    for (i = 0; i < width; i++) { \
-        int compA = rfunc(&src[i*6+0]) >> 8; \
-        int compB = rfunc(&src[i*6+2]) >> 8; \
-        int compC = rfunc(&src[i*6+4]) >> 8; \
- \
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-    } \
-} \
- \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
-                        uint8_t *dstU, uint8_t *dstV, \
-                        const uint8_t *src1, const uint8_t *src2, \
-                        int width, uint32_t *unused) \
-{ \
-    int i; \
-    assert(src1==src2); \
-    for (i = 0; i < width; i++) { \
-        int compA = rfunc(&src1[6*i + 0]) >> 8; \
-        int compB = rfunc(&src1[6*i + 2]) >> 8; \
-        int compC = rfunc(&src1[6*i + 4]) >> 8; \
- \
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-    } \
-} \
- \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
-                            uint8_t *dstU, uint8_t *dstV, \
-                            const uint8_t *src1, const uint8_t *src2, \
-                            int width, uint32_t *unused) \
-{ \
-    int i; \
-    assert(src1==src2); \
-    for (i = 0; i < width; i++) { \
-        int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
-        int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
-        int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
- \
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
-    } \
+static av_always_inline void
+rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
+                    enum PixelFormat origin)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+        int a = input_pixel(&src[i*6+0]) >> 8;
+        int g = input_pixel(&src[i*6+2]) >> 8;
+        int c = input_pixel(&src[i*6+4]) >> 8;
+
+#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? c : a)
+#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? a : c)
+        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
 }
-rgb48funcs(LE, AV_RL16, r, g, b);
-rgb48funcs(BE, AV_RB16, r, g, b);
-rgb48funcs(LE, AV_RL16, b, g, r);
-rgb48funcs(BE, AV_RB16, b, g, r);
+
+static av_always_inline void
+rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                    const uint8_t *src1, const uint8_t *src2,
+                    int width, enum PixelFormat origin)
+{
+    int i;
+    assert(src1==src2);
+    for (i = 0; i < width; i++) {
+        int a = input_pixel(&src1[6*i + 0]) >> 8;
+        int g = input_pixel(&src1[6*i + 2]) >> 8;
+        int c = input_pixel(&src1[6*i + 4]) >> 8;
+
+        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
+static av_always_inline void
+rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+                          const uint8_t *src1, const uint8_t *src2,
+                          int width, enum PixelFormat origin)
+{
+    int i;
+    assert(src1==src2);
+    for (i = 0; i < width; i++) {
+        int a = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
+        int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
+        int c = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+    }
+#undef r
+#undef b
+#undef input_pixel
+}
+
+#define rgb48funcs(pattern, BE_LE, origin) \
+static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
+                                    int width, uint32_t *unused) \
+{ \
+    rgb48ToY_c_template(dst, src, width, origin); \
+} \
+ \
+static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                    const uint8_t *src1, const uint8_t *src2, \
+                                    int width, uint32_t *unused) \
+{ \
+    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
+} \
+ \
+static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
+                                    const uint8_t *src1, const uint8_t *src2, \
+                                    int width, uint32_t *unused) \
+{ \
+    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
+}
+
+rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
+rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
+rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
+rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
 static void name ## _c(uint8_t *dst, const uint8_t *src, \

From ed8a50068caacf722b6c7a81f77abcadc134e544 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 9 Jun 2011 09:50:38 -0700
Subject: [PATCH 702/830] riff: Fix potential memleak.

Make ff_get_wav_header() free existing extradata before allocing a new
buffer.
---
 libavformat/riff.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/riff.c b/libavformat/riff.c
index e17980a00a..fe6cc55055 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -509,6 +509,7 @@ int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size)
         }
         codec->extradata_size = cbSize;
         if (cbSize > 0) {
+            av_free(codec->extradata);
             codec->extradata = av_mallocz(codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
             if (!codec->extradata)
                 return AVERROR(ENOMEM);

From 9a1b79128c25a5c1004bbd0af85a68c9a2a3e580 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 9 Jun 2011 09:52:47 -0700
Subject: [PATCH 703/830] ffmpeg.c: Add a necessary const qualifier

---
 ffmpeg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 5099fc29ea..cbcdba8c64 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -573,7 +573,7 @@ static void update_sample_fmt(AVCodecContext *dec, AVCodec *dec_codec,
     if (dec_codec && dec_codec->sample_fmts &&
         dec_codec->sample_fmts[0] != AV_SAMPLE_FMT_NONE &&
         dec_codec->sample_fmts[1] != AV_SAMPLE_FMT_NONE) {
-        enum AVSampleFormat *p;
+        const enum AVSampleFormat *p;
         int min_dec = -1, min_inc = -1;
 
         /* find a matching sample format in the encoder */

From 85e9e3a9fa9c79bb3b4af74d15c7aa62f29515ce Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Mon, 6 Jun 2011 10:59:46 -0700
Subject: [PATCH 704/830] movdec: Add support for the 'wfex' atom.

The 'wfex' is just a Microsoft WaveFormatEx struct.
---
 libavformat/mov.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 245933da3a..4d3f4f69d2 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -508,6 +508,19 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static int mov_read_wfex(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
+    ff_get_wav_header(pb, st->codec, atom.size);
+
+    return 0;
+}
+
 static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     const int num = avio_rb32(pb);
@@ -2247,6 +2260,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('e','s','d','s'), mov_read_esds },
 { MKTAG('d','a','c','3'), mov_read_dac3 }, /* AC-3 info */
 { MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */
+{ MKTAG('w','f','e','x'), mov_read_wfex },
 { MKTAG('c','m','o','v'), mov_read_cmov },
 { 0, NULL }
 };

From e2babb9b47072ca586b4414680b8d7abf85a16e5 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 10:00:09 -0400
Subject: [PATCH 705/830] swscale: rearrange code.

This way the code in the file is less cluttered all-over-the-
place.
---
 libswscale/swscale.c | 166 +++++++++++++++++++++----------------------
 1 file changed, 83 insertions(+), 83 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index be42bcfbef..68508a9741 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -329,6 +329,33 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
+static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
+                       const int16_t *chrUSrc, const int16_t *chrVSrc,
+                       const int16_t *alpSrc,
+                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                       uint8_t *aDest, int dstW, int chrDstW)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val= (lumSrc[i]+64)>>7;
+        dest[i]= av_clip_uint8(val);
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++) {
+            int u=(chrUSrc[i]+64)>>7;
+            int v=(chrVSrc[i]+64)>>7;
+            uDest[i]= av_clip_uint8(u);
+            vDest[i]= av_clip_uint8(v);
+        }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest)
+        for (i=0; i<dstW; i++) {
+            int val= (alpSrc[i]+64)>>7;
+            aDest[i]= av_clip_uint8(val);
+        }
+}
+
 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                         const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -996,6 +1023,46 @@ static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *buf1, const uint16_t *ubuf0,
+                          const uint16_t *ubuf1, const uint16_t *vbuf0,
+                          const uint16_t *vbuf1, const uint16_t *abuf0,
+                          const uint16_t *abuf1, uint8_t *dest, int dstW,
+                          int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *ubuf0, const uint16_t *ubuf1,
+                          const uint16_t *vbuf0, const uint16_t *vbuf1,
+                          const uint16_t *abuf0, uint8_t *dest, int dstW,
+                          int uvalpha, enum PixelFormat dstFormat,
+                          int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (uvalpha < 2048) {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
+    } else {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
+    }
+}
+
 static av_always_inline void fillPlane(uint8_t* plane, int stride,
                                        int width, int height,
                                        int y, uint8_t val)
@@ -1111,22 +1178,6 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i];
-    }
-}
-
-static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i+3];
-    }
-}
-
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
                        const uint8_t *src, const uint8_t *dummy, \
@@ -1171,6 +1222,22 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
+static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i];
+    }
+}
+
+static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i+3];
+    }
+}
+
 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
     int i;
@@ -1217,73 +1284,6 @@ static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
     }
 }
 
-static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                       const int16_t *chrUSrc, const int16_t *chrVSrc,
-                       const int16_t *alpSrc,
-                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                       uint8_t *aDest, int dstW, int chrDstW)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
-        dest[i]= av_clip_uint8(val);
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++) {
-            int u=(chrUSrc[i]+64)>>7;
-            int v=(chrVSrc[i]+64)>>7;
-            uDest[i]= av_clip_uint8(u);
-            vDest[i]= av_clip_uint8(v);
-        }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest)
-        for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
-            aDest[i]= av_clip_uint8(val);
-        }
-}
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *buf1, const uint16_t *ubuf0,
-                          const uint16_t *ubuf1, const uint16_t *vbuf0,
-                          const uint16_t *vbuf1, const uint16_t *abuf0,
-                          const uint16_t *abuf1, uint8_t *dest, int dstW,
-                          int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *ubuf0, const uint16_t *ubuf1,
-                          const uint16_t *vbuf0, const uint16_t *vbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW,
-                          int uvalpha, enum PixelFormat dstFormat,
-                          int flags, int y)
-{
-    const int yalpha1=0;
-    int i;
-
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
-    }
-}
-
 //FIXME yuy2* can read up to 7 samples too much
 
 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,

From 6d4d483eee04d78ba021e84aec2ff75d9fc3a236 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 13:59:21 -0400
Subject: [PATCH 706/830] swscale: de-macro'ify RGB15/16/32 input functions.

Inline functions are easier to read, maintain, modify and test,
which justifies the slightly increased source size. This patch
also adds support for non-native endianness RGB15/16 and fixes
isSupportedOutput() to no longer claim that we support writing
non-native RGB565/555/444.
---
 libswscale/swscale.c | 276 ++++++++++++++++++++++++++-----------------
 libswscale/utils.c   |  29 ++++-
 2 files changed, 192 insertions(+), 113 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 68508a9741..3659b42725 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -819,10 +819,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             dest+=6;\
         }\
         break;\
-    case PIX_FMT_RGB565BE:\
-    case PIX_FMT_RGB565LE:\
-    case PIX_FMT_BGR565BE:\
-    case PIX_FMT_BGR565LE:\
+    case PIX_FMT_RGB565:\
+    case PIX_FMT_BGR565:\
         {\
             const int dr1= dither_2x2_8[y&1    ][0];\
             const int dg1= dither_2x2_4[y&1    ][0];\
@@ -836,10 +834,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_RGB555BE:\
-    case PIX_FMT_RGB555LE:\
-    case PIX_FMT_BGR555BE:\
-    case PIX_FMT_BGR555LE:\
+    case PIX_FMT_RGB555:\
+    case PIX_FMT_BGR555:\
         {\
             const int dr1= dither_2x2_8[y&1    ][0];\
             const int dg1= dither_2x2_8[y&1    ][1];\
@@ -853,10 +849,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_RGB444BE:\
-    case PIX_FMT_RGB444LE:\
-    case PIX_FMT_BGR444BE:\
-    case PIX_FMT_BGR444LE:\
+    case PIX_FMT_RGB444:\
+    case PIX_FMT_BGR444:\
         {\
             const int dr1= dither_4x4_16[y&3    ][0];\
             const int dg1= dither_4x4_16[y&3    ][1];\
@@ -1155,72 +1149,126 @@ rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
 
-#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static void name ## _c(uint8_t *dst, const uint8_t *src, \
-                       int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int b= (((const type*)src)[i]>>shb)&maskb;\
-        int g= (((const type*)src)[i]>>shg)&maskg;\
-        int r= (((const type*)src)[i]>>shr)&maskr;\
-\
-        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
-    }\
+static av_always_inline void
+rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
+                       int width, enum PixelFormat origin,
+                       int shr,   int shg,   int shb, int shp,
+                       int maskr, int maskg, int maskb,
+                       int rsh,   int gsh,   int bsh, int S)
+{
+    const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
+              rnd = 33 << (S - 1);
+    int i;
+
+    for (i = 0; i < width; i++) {
+#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
+                         origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
+                        (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
+        int px = input_pixel(i) >> shp;
+        int b = (px & maskb) >> shb;
+        int g = (px & maskg) >> shg;
+        int r = (px & maskr) >> shr;
+
+        dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
+    }
 }
 
-BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
-BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
+static av_always_inline void
+rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                        const uint8_t *src, int width,
+                        enum PixelFormat origin,
+                        int shr,   int shg,   int shb, int shp,
+                        int maskr, int maskg, int maskb,
+                        int rsh,   int gsh,   int bsh, int S)
+{
+    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
+              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+              rnd = 257 << (S - 1);
+    int i;
 
-#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
-                       const uint8_t *src, const uint8_t *dummy, \
-                       int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
-        int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
-        int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
-    }\
-}\
-static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
-                            const uint8_t *src, const uint8_t *dummy, \
-                            int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int pix0= ((const type*)src)[2*i+0]>>shp;\
-        int pix1= ((const type*)src)[2*i+1]>>shp;\
-        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
-        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
-        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
-        g&= maskg|(2*maskg);\
-\
-        g>>=shg;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
-    }\
+    for (i = 0; i < width; i++) {
+        int px = input_pixel(i) >> shp;
+        int b = (px & maskb) >> shb;
+        int g = (px & maskg) >> shg;
+        int r = (px & maskr) >> shr;
+
+        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
+        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+    }
 }
 
-BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
-BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
+static av_always_inline void
+rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src, int width,
+                             enum PixelFormat origin,
+                             int shr,   int shg,   int shb, int shp,
+                             int maskr, int maskg, int maskb,
+                             int rsh,   int gsh,   int bsh, int S)
+{
+    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
+              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+              rnd = 257 << S, maskgx = ~(maskr | maskb);
+    int i;
+
+    maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
+    for (i = 0; i < width; i++) {
+        int px0 = input_pixel(2 * i + 0) >> shp;
+        int px1 = input_pixel(2 * i + 1) >> shp;
+        int b, r, g = (px0 & maskgx) + (px1 & maskgx);
+        int rb = px0 + px1 - g;
+
+        b = (rb & maskb) >> shb;
+        if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
+            origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
+            g >>= shg;
+        } else {
+            g = (g  & maskg) >> shg;
+        }
+        r = (rb & maskr) >> shr;
+
+        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
+        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+    }
+#undef input_pixel
+}
+
+#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
+                         maskg, maskb, rsh, gsh, bsh, S) \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
+                          int width, uint32_t *unused) \
+{ \
+    rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
+                           maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                           const uint8_t *src, const uint8_t *dummy, \
+                           int width, uint32_t *unused) \
+{ \
+    rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
+                                const uint8_t *src, const uint8_t *dummy, \
+                                int width, uint32_t *unused) \
+{ \
+    rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+                                 maskr, maskg, maskb, rsh, gsh, bsh, S); \
+}
+
+rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
 
 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
@@ -1979,37 +2027,45 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
-        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half_c;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
+        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
+        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
+        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
+        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
+        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
+        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
+        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
+        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
+        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
+        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
+        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
+        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
+        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
+        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
+        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
+        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
+        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
+        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
         }
     } else {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
-        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_c;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
+        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
+        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
+        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
+        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
+        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
+        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
+        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
+        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
+        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
+        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
+        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
+        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
+        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
+        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
+        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
+        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
+        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
+        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
         }
     }
 
@@ -2030,13 +2086,17 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_YUV420P16LE:
     case PIX_FMT_YUV422P16LE:
     case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
-    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
-    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY_c; break;
-    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY_c; break;
-    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
-    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY_c; break;
-    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY_c; break;
+    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c;    break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
+    case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
+    case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
+    case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
+    case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
+    case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
+    case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
+    case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
+    case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
     case PIX_FMT_RGB8     :
     case PIX_FMT_BGR8     :
     case PIX_FMT_PAL8     :
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 827abc66d2..d552330ec5 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -75,13 +75,17 @@ const char *swscale_license(void)
         || (x)==PIX_FMT_BGR48BE     \
         || (x)==PIX_FMT_BGR48LE     \
         || (x)==PIX_FMT_BGR24       \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR565LE    \
+        || (x)==PIX_FMT_BGR565BE    \
+        || (x)==PIX_FMT_BGR555LE    \
+        || (x)==PIX_FMT_BGR555BE    \
         || (x)==PIX_FMT_BGR32       \
         || (x)==PIX_FMT_BGR32_1     \
         || (x)==PIX_FMT_RGB24       \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB565LE    \
+        || (x)==PIX_FMT_RGB565BE    \
+        || (x)==PIX_FMT_RGB555LE    \
+        || (x)==PIX_FMT_RGB555BE    \
         || (x)==PIX_FMT_GRAY8       \
         || (x)==PIX_FMT_Y400A       \
         || (x)==PIX_FMT_YUV410P     \
@@ -134,7 +138,22 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt)
         || (x)==PIX_FMT_YUVJ422P    \
         || (x)==PIX_FMT_YUVJ440P    \
         || (x)==PIX_FMT_YUVJ444P    \
-        || isAnyRGB(x)              \
+        || isRGBinBytes(x)          \
+        || isBGRinBytes(x)          \
+        || (x)==PIX_FMT_RGB565      \
+        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB444      \
+        || (x)==PIX_FMT_BGR565      \
+        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR444      \
+        || (x)==PIX_FMT_RGB8        \
+        || (x)==PIX_FMT_BGR8        \
+        || (x)==PIX_FMT_RGB4_BYTE   \
+        || (x)==PIX_FMT_BGR4_BYTE   \
+        || (x)==PIX_FMT_RGB4        \
+        || (x)==PIX_FMT_BGR4        \
+        || (x)==PIX_FMT_MONOBLACK   \
+        || (x)==PIX_FMT_MONOWHITE   \
         || (x)==PIX_FMT_NV12        \
         || (x)==PIX_FMT_NV21        \
         || (x)==PIX_FMT_GRAY16BE    \

From 6b105e3ee607b4d83f894ee0c18bbd1d6f1e996f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 16:25:21 -0400
Subject: [PATCH 707/830] swscale: extract monowhite/black output from
 yuv2packed[12X]_c().

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 172 ++++++++++++++++++++++++++++---------------
 1 file changed, 113 insertions(+), 59 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 3659b42725..39aac0c67b 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -528,6 +528,104 @@ static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
 YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
 YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
 
+static av_always_inline void
+yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                      const int16_t **lumSrc, int lumFilterSize,
+                      const int16_t *chrFilter, const int16_t **chrUSrc,
+                      const int16_t **chrVSrc, int chrFilterSize,
+                      const int16_t **alpSrc, uint8_t *dest, int dstW,
+                      int y, enum PixelFormat target)
+{
+    const uint8_t * const d128=dither_8x8_220[y&7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int i;
+    int acc = 0;
+
+#define output_pixel(pos, acc) \
+    if (target == PIX_FMT_MONOBLACK) { \
+        pos = acc; \
+    } else { \
+        pos = ~acc; \
+    }
+    for (i = 0; i < dstW - 1; i += 2) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i]   * lumFilter[j];
+            Y2 += lumSrc[j][i+1] * lumFilter[j];
+        }
+        Y1 >>= 19;
+        Y2 >>= 19;
+        if ((Y1 | Y2) & 0x100) {
+            Y1 = av_clip_uint8(Y1);
+            Y2 = av_clip_uint8(Y2);
+        }
+        acc += acc + g[Y1 + d128[(i + 0) & 7]];
+        acc += acc + g[Y2 + d128[(i + 1) & 7]];
+        if ((i & 7) == 6) {
+            output_pixel(*dest++, acc);
+        }
+    }
+}
+
+static av_always_inline void
+yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
+                      const uint16_t *buf1, const uint16_t *ubuf0,
+                      const uint16_t *ubuf1, const uint16_t *vbuf0,
+                      const uint16_t *vbuf1, const uint16_t *abuf0,
+                      const uint16_t *abuf1, uint8_t *dest, int dstW,
+                      int yalpha, int uvalpha, int y,
+                      enum PixelFormat target)
+{
+    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int  yalpha1 = 4095 - yalpha;
+    int i;
+
+    for (i = 0; i < dstW - 7; i += 8) {
+        int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
+        acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
+        acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
+        acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
+        acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
+        acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
+        acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
+        acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
+        output_pixel(*dest++, acc);
+    }
+}
+
+static av_always_inline void
+yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
+                      const uint16_t *ubuf0, const uint16_t *ubuf1,
+                      const uint16_t *vbuf0, const uint16_t *vbuf1,
+                      const uint16_t *abuf0, uint8_t *dest, int dstW,
+                      int uvalpha, enum PixelFormat dstFormat,
+                      int flags, int y, enum PixelFormat target)
+{
+    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int i;
+
+    for (i = 0; i < dstW - 7; i += 8) {
+        int acc =    g[(buf0[i    ] >> 7) + d128[0]];
+        acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
+        acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
+        acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
+        acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
+        acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
+        acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
+        acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
+        output_pixel(*dest++, acc);
+    }
+#undef output_pixel
+}
+
+YUV2PACKEDWRAPPER(yuv2mono, white, PIX_FMT_MONOWHITE);
+YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
+
 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
@@ -677,51 +775,7 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
     b = (type *)c->table_bU[U];
 
-#define YSCALE_YUV_2_MONO2_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    for (i=0; i<dstW-7; i+=8) {\
-        int acc;\
-        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
-        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
-        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
-        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
-        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
-        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
-        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
-        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
-        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-        dest++;\
-    }
-
-#define YSCALE_YUV_2_MONOX_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    int acc=0;\
-    for (i=0; i<dstW-1; i+=2) {\
-        int j;\
-        int Y1=1<<18;\
-        int Y2=1<<18;\
-\
-        for (j=0; j<lumFilterSize; j++) {\
-            Y1 += lumSrc[j][i] * lumFilter[j];\
-            Y2 += lumSrc[j][i+1] * lumFilter[j];\
-        }\
-        Y1>>=19;\
-        Y2>>=19;\
-        if ((Y1|Y2)&0x100) {\
-            Y1 = av_clip_uint8(Y1); \
-            Y2 = av_clip_uint8(Y2); \
-        }\
-        acc+= acc + g[Y1+d128[(i+0)&7]];\
-        acc+= acc + g[Y2+d128[(i+1)&7]];\
-        if ((i&7)==6) {\
-            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-            dest++;\
-        }\
-    }
-
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
+#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
     switch(c->dstFormat) {\
     case PIX_FMT_RGB48BE:\
     case PIX_FMT_RGB48LE:\
@@ -897,12 +951,6 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_MONOBLACK:\
-    case PIX_FMT_MONOWHITE:\
-        {\
-            func_monoblack\
-        }\
-        break;\
     case PIX_FMT_YUYV422:\
         func2\
             ((uint8_t*)dest)[2*i2+0]= Y1;\
@@ -928,7 +976,7 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0))
 }
 
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
@@ -1031,7 +1079,7 @@ static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
     int uvalpha1=4095-uvalpha;
     int i;
 
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0))
 }
 
 /**
@@ -1044,16 +1092,12 @@ static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
                           int uvalpha, enum PixelFormat dstFormat,
                           int flags, int y)
 {
-    const int yalpha1=0;
     int i;
 
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
     if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0))
     } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0))
     }
 }
 
@@ -1717,6 +1761,16 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packed2 = yuv2gray16LE_2_c;
             *yuv2packedX = yuv2gray16LE_X_c;
             break;
+        case PIX_FMT_MONOWHITE:
+            *yuv2packed1 = yuv2monowhite_1_c;
+            *yuv2packed2 = yuv2monowhite_2_c;
+            *yuv2packedX = yuv2monowhite_X_c;
+            break;
+        case PIX_FMT_MONOBLACK:
+            *yuv2packed1 = yuv2monoblack_1_c;
+            *yuv2packed2 = yuv2monoblack_2_c;
+            *yuv2packedX = yuv2monoblack_X_c;
+            break;
         default:
             *yuv2packed1 = yuv2packed1_c;
             *yuv2packed2 = yuv2packed2_c;

From c673c90515ab553c9ed9f4e1997600d1b3e4e2bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Tue, 29 Mar 2011 20:18:45 +0200
Subject: [PATCH 708/830] oss,sndio: simplify by using FFMIN.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavdevice/oss_audio.c | 4 +---
 libavdevice/sndio_enc.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c
index af46ea890b..fcbe26ba93 100644
--- a/libavdevice/oss_audio.c
+++ b/libavdevice/oss_audio.c
@@ -181,9 +181,7 @@ static int audio_write_packet(AVFormatContext *s1, AVPacket *pkt)
     uint8_t *buf= pkt->data;
 
     while (size > 0) {
-        len = AUDIO_BLOCK_SIZE - s->buffer_ptr;
-        if (len > size)
-            len = size;
+        len = FFMIN(AUDIO_BLOCK_SIZE - s->buffer_ptr, size);
         memcpy(s->buffer + s->buffer_ptr, buf, len);
         s->buffer_ptr += len;
         if (s->buffer_ptr >= AUDIO_BLOCK_SIZE) {
diff --git a/libavdevice/sndio_enc.c b/libavdevice/sndio_enc.c
index 6745ba4893..49a52b355e 100644
--- a/libavdevice/sndio_enc.c
+++ b/libavdevice/sndio_enc.c
@@ -49,9 +49,7 @@ static int audio_write_packet(AVFormatContext *s1, AVPacket *pkt)
     int len, ret;
 
     while (size > 0) {
-        len = s->buffer_size - s->buffer_offset;
-        if (len > size)
-            len = size;
+        len = FFMIN(s->buffer_size - s->buffer_offset, size);
         memcpy(s->buffer + s->buffer_offset, buf, len);
         buf  += len;
         size -= len;

From 95a05030ca73076a8e5127e69596ade5964af60b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 9 Jun 2011 22:29:40 +0200
Subject: [PATCH 709/830] intelh263dec: fix "Strict H.263 compliance"  file
 playback

fixes issue2449

patch submitted by xvid_fan freenet de
---
 libavcodec/intelh263dec.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c
index c3d4d26872..b60608cd31 100644
--- a/libavcodec/intelh263dec.c
+++ b/libavcodec/intelh263dec.c
@@ -46,7 +46,7 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
     skip_bits1(&s->gb);         /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
-    if (format != 7) {
+    if (format == 0 || format == 6) {
         av_log(s->avctx, AV_LOG_ERROR, "Intel H263 free format not supported\n");
         return -1;
     }
@@ -64,7 +64,10 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
     s->obmc= get_bits1(&s->gb);
     s->pb_frame = get_bits1(&s->gb);
 
-    if(format == 7){
+    if (format < 6) {
+        s->width = h263_format[format][0];
+        s->height = h263_format[format][1];
+    } else {
         format = get_bits(&s->gb, 3);
         if(format == 0 || format == 7){
             av_log(s->avctx, AV_LOG_ERROR, "Wrong Intel H263 format\n");

From 8e84c072e4e8db33641696555ed73de61ce61442 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 9 Jun 2011 22:36:31 +0200
Subject: [PATCH 710/830] intelh263dec: aspect ratio processing fix.

patch submitted by xvid_fan freenet de
---
 libavcodec/intelh263dec.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c
index b60608cd31..83049bcc6f 100644
--- a/libavcodec/intelh263dec.c
+++ b/libavcodec/intelh263dec.c
@@ -67,6 +67,8 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
     if (format < 6) {
         s->width = h263_format[format][0];
         s->height = h263_format[format][1];
+        s->avctx->sample_aspect_ratio.num = 12;
+        s->avctx->sample_aspect_ratio.den = 11;
     } else {
         format = get_bits(&s->gb, 3);
         if(format == 0 || format == 7){
@@ -91,9 +93,13 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
         skip_bits1(&s->gb);
         skip_bits(&s->gb, 9); // display height
         if(ar == 15){
-            skip_bits(&s->gb, 8); // aspect ratio - width
-            skip_bits(&s->gb, 8); // aspect ratio - height
+            s->avctx->sample_aspect_ratio.num = get_bits(&s->gb, 8); // aspect ratio - width
+            s->avctx->sample_aspect_ratio.den = get_bits(&s->gb, 8); // aspect ratio - height
+        } else {
+            s->avctx->sample_aspect_ratio = ff_h263_pixel_aspect[ar];
         }
+        if (s->avctx->sample_aspect_ratio.num == 0)
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid aspect ratio.\n");
     }
 
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);

From 4654420410da643812b7b90fb39dd42e3a02bc53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sat, 26 Feb 2011 12:52:01 +0100
Subject: [PATCH 711/830] matroskadec: set timestamps for RealAudio packets.

Improves seeking in ffplay with
http://samples.mplayerhq.hu/Matroska/RA_missing_timestamps.mkv

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/matroskadec.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 64db318869..57a8f624b8 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -128,6 +128,7 @@ typedef struct {
     int      sub_packet_size;
     int      sub_packet_cnt;
     int      pkt_cnt;
+    uint64_t buf_timecode;
     uint8_t *buf;
 } MatroskaTrackAudio;
 
@@ -1746,6 +1747,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                 int x;
 
                 if (!track->audio.pkt_cnt) {
+                    if (track->audio.sub_packet_cnt == 0)
+                        track->audio.buf_timecode = timecode;
                     if (st->codec->codec_id == CODEC_ID_RA_288)
                         for (x=0; x<h/2; x++)
                             memcpy(track->audio.buf+x*2*w+y*cfs,
@@ -1768,6 +1771,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                     av_new_packet(pkt, a);
                     memcpy(pkt->data, track->audio.buf
                            + a * (h*w / a - track->audio.pkt_cnt--), a);
+                    pkt->pts = track->audio.buf_timecode;
+                    track->audio.buf_timecode = AV_NOPTS_VALUE;
                     pkt->pos = pos;
                     pkt->stream_index = st->index;
                     dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
@@ -1911,6 +1916,9 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
 
     index_min = index;
     for (i=0; i < matroska->tracks.nb_elem; i++) {
+        tracks[i].audio.pkt_cnt = 0;
+        tracks[i].audio.sub_packet_cnt = 0;
+        tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
         tracks[i].end_timecode = 0;
         if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
             && !tracks[i].stream->discard != AVDISCARD_ALL) {

From 996bbdbf1f5a031d38e8af3eac432bf8e2c72458 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 15 Mar 2011 12:34:55 +0100
Subject: [PATCH 712/830] lavf: make compute_pkt_fields2() return meaningful
 error values

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/utils.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index c198af07ed..b189bfb622 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2867,7 +2867,7 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt){
             pkt->pts, pkt->dts, st->cur_dts, delay, pkt->size, pkt->stream_index);
 
 /*    if(pkt->pts == AV_NOPTS_VALUE && pkt->dts == AV_NOPTS_VALUE)
-        return -1;*/
+        return AVERROR(EINVAL);*/
 
     /* duration field */
     if (pkt->duration == 0) {
@@ -2902,11 +2902,11 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt){
         av_log(s, AV_LOG_ERROR,
                "Application provided invalid, non monotonically increasing dts to muxer in stream %d: %"PRId64" >= %"PRId64"\n",
                st->index, st->cur_dts, pkt->dts);
-        return -1;
+        return AVERROR(EINVAL);
     }
     if(pkt->dts != AV_NOPTS_VALUE && pkt->pts != AV_NOPTS_VALUE && pkt->pts < pkt->dts){
         av_log(s, AV_LOG_ERROR, "pts < dts in stream %d\n", st->index);
-        return -1;
+        return AVERROR(EINVAL);
     }
 
 //    av_log(s, AV_LOG_DEBUG, "av_write_frame: pts2:%"PRId64" dts2:%"PRId64"\n", pkt->pts, pkt->dts);

From 6a137dde3ed6d88dc8bfa9de6f29a23e2b384a77 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 10 Jun 2011 00:19:25 +0200
Subject: [PATCH 713/830] dict: This code was developed in ffmpeg and not
 libav, nor by libav developers. Correct copyright notices.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/dict.c | 8 ++++----
 libavutil/dict.h | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavutil/dict.c b/libavutil/dict.c
index 56f1513d32..332eccd679 100644
--- a/libavutil/dict.c
+++ b/libavutil/dict.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2009 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavutil/dict.h b/libavutil/dict.h
index bfd7f2682c..19cc0915d8 100644
--- a/libavutil/dict.h
+++ b/libavutil/dict.h
@@ -1,19 +1,19 @@
 /*
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From af2faf2076b96ab85cc51d5e970574079373c396 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 18:13:53 -0400
Subject: [PATCH 714/830] swscale: split YUYV output out of
 yuv2packed[12X]_c().

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 181 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 137 insertions(+), 44 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 39aac0c67b..bc076dd3f2 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -626,7 +626,117 @@ yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
 YUV2PACKEDWRAPPER(yuv2mono, white, PIX_FMT_MONOWHITE);
 YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
 
-#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
+static av_always_inline void
+yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                     const int16_t **lumSrc, int lumFilterSize,
+                     const int16_t *chrFilter, const int16_t **chrUSrc,
+                     const int16_t **chrVSrc, int chrFilterSize,
+                     const int16_t **alpSrc, uint8_t *dest, int dstW,
+                     int y, enum PixelFormat target)
+{
+    int i;
+
+#define output_pixels(pos, Y1, U, Y2, V) \
+    if (target == PIX_FMT_YUYV422) { \
+        dest[pos + 0] = Y1; \
+        dest[pos + 1] = U;  \
+        dest[pos + 2] = Y2; \
+        dest[pos + 3] = V;  \
+    } else { \
+        dest[pos + 0] = U;  \
+        dest[pos + 1] = Y1; \
+        dest[pos + 2] = V;  \
+        dest[pos + 3] = Y2; \
+    }
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+        int U  = 1 << 18;
+        int V  = 1 << 18;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
+            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
+        }
+        for (j = 0; j < chrFilterSize; j++) {
+            U += chrUSrc[j][i] * chrFilter[j];
+            V += chrVSrc[j][i] * chrFilter[j];
+        }
+        Y1 >>= 19;
+        Y2 >>= 19;
+        U  >>= 19;
+        V  >>= 19;
+        if ((Y1 | Y2 | U | V) & 0x100) {
+            Y1 = av_clip_uint8(Y1);
+            Y2 = av_clip_uint8(Y2);
+            U  = av_clip_uint8(U);
+            V  = av_clip_uint8(V);
+        }
+        output_pixels(4*i, Y1, U, Y2, V);
+    }
+}
+
+static av_always_inline void
+yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
+                     const uint16_t *buf1, const uint16_t *ubuf0,
+                     const uint16_t *ubuf1, const uint16_t *vbuf0,
+                     const uint16_t *vbuf1, const uint16_t *abuf0,
+                     const uint16_t *abuf1, uint8_t *dest, int dstW,
+                     int yalpha, int uvalpha, int y,
+                     enum PixelFormat target)
+{
+    int  yalpha1 = 4095 - yalpha;
+    int uvalpha1 = 4095 - uvalpha;
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
+        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
+        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
+        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
+
+        output_pixels(i * 4, Y1, U, Y2, V);
+    }
+}
+
+static av_always_inline void
+yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
+                     const uint16_t *ubuf0, const uint16_t *ubuf1,
+                     const uint16_t *vbuf0, const uint16_t *vbuf1,
+                     const uint16_t *abuf0, uint8_t *dest, int dstW,
+                     int uvalpha, enum PixelFormat dstFormat,
+                     int flags, int y, enum PixelFormat target)
+{
+    int i;
+
+    if (uvalpha < 2048) {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 = buf0[i * 2]     >> 7;
+            int Y2 = buf0[i * 2 + 1] >> 7;
+            int U  = ubuf1[i]        >> 7;
+            int V  = vbuf1[i]        >> 7;
+
+            output_pixels(i * 4, Y1, U, Y2, V);
+        }
+    } else {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 =  buf0[i * 2]          >> 7;
+            int Y2 =  buf0[i * 2 + 1]      >> 7;
+            int U  = (ubuf0[i] + ubuf1[i]) >> 8;
+            int V  = (vbuf0[i] + vbuf1[i]) >> 8;
+
+            output_pixels(i * 4, Y1, U, Y2, V);
+        }
+    }
+#undef output_pixels
+}
+
+YUV2PACKEDWRAPPER(yuv2422, yuyv, PIX_FMT_YUYV422);
+YUV2PACKEDWRAPPER(yuv2422, uyvy, PIX_FMT_UYVY422);
+
+#define YSCALE_YUV_2_RGBX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
         int Y1 = 1<<18;\
@@ -668,7 +778,11 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
                 A1 = av_clip_uint8(A1); \
                 A2 = av_clip_uint8(A2); \
             }\
-        }
+        }\
+        /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
+    r = (type *)c->table_rV[V];   \
+    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
+    b = (type *)c->table_bU[U];
 
 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
     for (i=0; i<dstW; i++) {\
@@ -709,13 +823,7 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
             B = av_clip_uintp2(B, 30); \
         }
 
-#define YSCALE_YUV_2_RGBX_C(type,alpha) \
-    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
-    r = (type *)c->table_rV[V];   \
-    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
-    b = (type *)c->table_bU[U];
-
-#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
+#define YSCALE_YUV_2_RGB2_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) { \
         const int i2= 2*i;       \
         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
@@ -727,15 +835,12 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
         if (alpha) {\
             A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
-        }
-
-#define YSCALE_YUV_2_RGB2_C(type,alpha) \
-    YSCALE_YUV_2_PACKED2_C(type,alpha)\
+        }\
     r = (type *)c->table_rV[V];\
     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
     b = (type *)c->table_bU[U];
 
-#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
+#define YSCALE_YUV_2_RGB1_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
@@ -747,15 +852,12 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
         if (alpha) {\
             A1= abuf0[i2  ]>>7;\
             A2= abuf0[i2+1]>>7;\
-        }
-
-#define YSCALE_YUV_2_RGB1_C(type,alpha) \
-    YSCALE_YUV_2_PACKED1_C(type,alpha)\
+        }\
     r = (type *)c->table_rV[V];\
     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
     b = (type *)c->table_bU[U];
 
-#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
+#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         const int i2= 2*i;\
         int Y1= buf0[i2  ]>>7;\
@@ -767,15 +869,12 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
         if (alpha) {\
             A1= abuf0[i2  ]>>7;\
             A2= abuf0[i2+1]>>7;\
-        }
-
-#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
-    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
+        }\
     r = (type *)c->table_rV[V];\
     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
     b = (type *)c->table_bU[U];
 
-#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
+#define YSCALE_YUV_2_ANYRGB_C(func)\
     switch(c->dstFormat) {\
     case PIX_FMT_RGB48BE:\
     case PIX_FMT_RGB48LE:\
@@ -951,22 +1050,6 @@ YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
             }\
         }\
         break;\
-    case PIX_FMT_YUYV422:\
-        func2\
-            ((uint8_t*)dest)[2*i2+0]= Y1;\
-            ((uint8_t*)dest)[2*i2+1]= U;\
-            ((uint8_t*)dest)[2*i2+2]= Y2;\
-            ((uint8_t*)dest)[2*i2+3]= V;\
-        }                \
-        break;\
-    case PIX_FMT_UYVY422:\
-        func2\
-            ((uint8_t*)dest)[2*i2+0]= U;\
-            ((uint8_t*)dest)[2*i2+1]= Y1;\
-            ((uint8_t*)dest)[2*i2+2]= V;\
-            ((uint8_t*)dest)[2*i2+3]= Y2;\
-        }                \
-        break;\
     }
 
 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
@@ -976,7 +1059,7 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0))
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
 }
 
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
@@ -1079,7 +1162,7 @@ static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
     int uvalpha1=4095-uvalpha;
     int i;
 
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0))
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
 }
 
 /**
@@ -1095,9 +1178,9 @@ static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
     int i;
 
     if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0))
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
     } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0))
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
     }
 }
 
@@ -1771,6 +1854,16 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packed2 = yuv2monoblack_2_c;
             *yuv2packedX = yuv2monoblack_X_c;
             break;
+        case PIX_FMT_YUYV422:
+            *yuv2packed1 = yuv2422yuyv_1_c;
+            *yuv2packed2 = yuv2422yuyv_2_c;
+            *yuv2packedX = yuv2422yuyv_X_c;
+            break;
+        case PIX_FMT_UYVY422:
+            *yuv2packed1 = yuv2422uyvy_1_c;
+            *yuv2packed2 = yuv2422uyvy_2_c;
+            *yuv2packedX = yuv2422uyvy_X_c;
+            break;
         default:
             *yuv2packed1 = yuv2packed1_c;
             *yuv2packed2 = yuv2packed2_c;

From 7b8ec38d28cc99c4017471c6981fc1206732b468 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 10 Jun 2011 04:54:30 +0200
Subject: [PATCH 715/830] ac3: there was no libav in 2010 thus this code cannot
 be from  libav.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/ac3enc_opts_template.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index 699c1b5982..bf2eb7ee76 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -2,20 +2,20 @@
  * AC-3 encoder options
  * Copyright (c) 2010 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From d8999306e516663de2b46aed0c79f7bcc77eb5a1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 25 Mar 2011 01:13:08 +0100
Subject: [PATCH 716/830] mpeg12: more advanced ffmpeg mpeg2 aspect guessing
 code.

Fixes issue1613, 621, 562 simultaneously

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/mpeg12.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 0e1536f7ab..03c95c191d 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1293,9 +1293,17 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
             avctx->ticks_per_frame=2;
         //MPEG-2 aspect
             if(s->aspect_ratio_info > 1){
-                //we ignore the spec here as reality does not match the spec, see for example
+                AVRational dar =
+                    av_mul_q(
+                        av_div_q(ff_mpeg2_aspect[s->aspect_ratio_info],
+                                 (AVRational){s1->pan_scan.width, s1->pan_scan.height}),
+                        (AVRational){s->width, s->height});
+
+                // we ignore the spec here and guess a bit as reality does not match the spec, see for example
                 // res_change_ffmpeg_aspect.ts and sequence-display-aspect.mpg
-                if( (s1->pan_scan.width == 0 )||(s1->pan_scan.height == 0) || 1){
+                // issue1613, 621, 562
+                if((s1->pan_scan.width == 0 ) || (s1->pan_scan.height == 0) ||
+                   (av_cmp_q(dar,(AVRational){4,3}) && av_cmp_q(dar,(AVRational){16,9}))) {
                     s->avctx->sample_aspect_ratio=
                         av_div_q(
                          ff_mpeg2_aspect[s->aspect_ratio_info],
@@ -1307,6 +1315,12 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
                          ff_mpeg2_aspect[s->aspect_ratio_info],
                          (AVRational){s1->pan_scan.width, s1->pan_scan.height}
                         );
+//issue1613 4/3 16/9 -> 16/9
+//res_change_ffmpeg_aspect.ts 4/3 225/44 ->4/3
+//widescreen-issue562.mpg 4/3 16/9 -> 16/9
+//                    s->avctx->sample_aspect_ratio= av_mul_q(s->avctx->sample_aspect_ratio, (AVRational){s->width, s->height});
+//av_log(NULL, AV_LOG_ERROR, "A %d/%d\n",ff_mpeg2_aspect[s->aspect_ratio_info].num, ff_mpeg2_aspect[s->aspect_ratio_info].den);
+//av_log(NULL, AV_LOG_ERROR, "B %d/%d\n",s->avctx->sample_aspect_ratio.num, s->avctx->sample_aspect_ratio.den);
                 }
             }else{
                 s->avctx->sample_aspect_ratio=

From f33a6a22b4f2382a5113194335ae2a22ae957fed Mon Sep 17 00:00:00 2001
From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
Date: Fri, 13 May 2011 23:46:48 +0900
Subject: [PATCH 717/830] mov: Fix wrong timestamp generation for fragmented
 movies that have time offset caused by the first edit list entry.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/mov.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 4d3f4f69d2..ab5c4e2db9 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1532,8 +1532,9 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
 
     /* adjust first dts according to edit list */
     if (sc->time_offset && mov->time_scale > 0) {
-        int rescaled = sc->time_offset < 0 ? av_rescale(sc->time_offset, sc->time_scale, mov->time_scale) : sc->time_offset;
-        current_dts = -rescaled;
+        if (sc->time_offset < 0)
+            sc->time_offset = av_rescale(sc->time_offset, sc->time_scale, mov->time_scale);
+        current_dts = -sc->time_offset;
         if (sc->ctts_data && sc->stts_data &&
             sc->ctts_data[0].duration / sc->stts_data[0].duration > 16) {
             /* more than 16 frames delay, dts are likely wrong
@@ -2063,7 +2064,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     if (flags & 0x001) data_offset        = avio_rb32(pb);
     if (flags & 0x004) first_sample_flags = avio_rb32(pb);
-    dts = st->duration;
+    dts = st->duration - sc->time_offset;
     offset = frag->base_data_offset + data_offset;
     distance = 0;
     av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags);
@@ -2092,7 +2093,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         offset += sample_size;
     }
     frag->moof_offset = offset;
-    st->duration = dts;
+    st->duration = dts + sc->time_offset;
     return 0;
 }
 

From 0558e266a267b5d90d3be1d8d86e60db2c303773 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 15:41:06 +0300
Subject: [PATCH 718/830] sdp: Allow passing an AVFormatContext to the SDP
 generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Options from the AVFormatContext can be read for modifying
the generated SDP.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/internal.h |  4 +++-
 libavformat/movenc.c   |  2 +-
 libavformat/sdp.c      | 11 ++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavformat/internal.h b/libavformat/internal.h
index ad3d9c91c2..7413b0906a 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -124,10 +124,12 @@ int ff_url_join(char *str, int size, const char *proto,
  * @param dest_type the destination address type, may be NULL
  * @param port the destination port of the media stream, 0 if unknown
  * @param ttl the time to live of the stream, 0 if not multicast
+ * @param fmt the AVFormatContext, which might contain options modifying
+ *            the generated SDP
  */
 void ff_sdp_write_media(char *buff, int size, AVCodecContext *c,
                         const char *dest_addr, const char *dest_type,
-                        int port, int ttl);
+                        int port, int ttl, AVFormatContext *fmt);
 
 /**
  * Write a packet to another muxer than the one the user originally
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 5f4eea49fc..b313510b25 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1359,7 +1359,7 @@ static int mov_write_udta_sdp(AVIOContext *pb, AVCodecContext *ctx, int index)
     char buf[1000] = "";
     int len;
 
-    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0);
+    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0, NULL);
     av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index);
     len = strlen(buf);
 
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index f72e2c567f..b996bf65f5 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -300,7 +300,7 @@ xiph_fail:
     return NULL;
 }
 
-static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type)
+static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type, AVFormatContext *fmt)
 {
     char *config = NULL;
 
@@ -449,7 +449,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
     return buff;
 }
 
-void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl)
+void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, AVFormatContext *fmt)
 {
     const char *type;
     int payload_type;
@@ -472,7 +472,7 @@ void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *des
         av_strlcatf(buff, size, "b=AS:%d\r\n", c->bit_rate / 1000);
     }
 
-    sdp_write_media_attributes(buff, size, c, payload_type);
+    sdp_write_media_attributes(buff, size, c, payload_type, fmt);
 }
 
 int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
@@ -521,7 +521,8 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
         for (j = 0; j < ac[i]->nb_streams; j++) {
             ff_sdp_write_media(buf, size,
                                   ac[i]->streams[j]->codec, dst[0] ? dst : NULL,
-                                  dst_type, (port > 0) ? port + j * 2 : 0, ttl);
+                                  dst_type, (port > 0) ? port + j * 2 : 0, ttl,
+                                  ac[i]);
             if (port <= 0) {
                 av_strlcatf(buf, size,
                                    "a=control:streamid=%d\r\n", i + j);
@@ -537,7 +538,7 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size)
     return AVERROR(ENOSYS);
 }
 
-void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl)
+void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, AVFormatContext *fmt)
 {
 }
 #endif

From cb7c11cc9e7e05c819fff487a3f486f11ab4b860 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 27 May 2011 22:21:40 +0300
Subject: [PATCH 719/830] avoptions: Add an av_opt_flag_is_set function for
 inspecting flag fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 doc/APIchanges     |  3 +++
 libavutil/avutil.h |  2 +-
 libavutil/opt.c    | 10 ++++++++++
 libavutil/opt.h    | 10 ++++++++++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index b57868dfdf..0ab658d89c 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-xx - xxxxxxx - lavu 51.6.0 - opt.h
+  Add av_opt_flag_is_set().
+
 2011-06-xx - xxxxxxx - lavu 51.5.0 - AVMetadata
   Move AVMetadata from lavf to lavu and rename it to
   AVDictionary -- new installed header dict.h.
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 9c660f3a99..0299bdf797 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  5
+#define LIBAVUTIL_VERSION_MINOR  6
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/opt.c b/libavutil/opt.c
index 172fcec456..7775bb2af3 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -320,6 +320,16 @@ int64_t av_get_int(void *obj, const char *name, const AVOption **o_out)
     return num*intnum/den;
 }
 
+int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name)
+{
+    const AVOption *field = av_find_opt(obj, field_name, NULL, 0, 0);
+    const AVOption *flag  = av_find_opt(obj, flag_name,  NULL, 0, 0);
+
+    if (!field || !flag || flag->type != FF_OPT_TYPE_CONST)
+        return 0;
+    return av_get_int(obj, field_name, NULL) & (int) flag->default_val.dbl;
+}
+
 static void opt_list(void *obj, void *av_log_obj, const char *unit,
                      int req_flags, int rej_flags)
 {
diff --git a/libavutil/opt.h b/libavutil/opt.h
index 8c3b6c1c36..46ad8acce1 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -181,4 +181,14 @@ int av_set_options_string(void *ctx, const char *opts,
  */
 void av_opt_free(void *obj);
 
+/**
+ * Check whether a particular flag is set in a flags field.
+ *
+ * @param field_name the name of the flag field option
+ * @param flag_name the name of the flag to check
+ * @return non-zero if the flag is set, zero if the flag isn't set,
+ *         isn't of the right type, or the flags field doesn't exist.
+ */
+int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name);
+
 #endif /* AVUTIL_OPT_H */

From 0832122880fa50e66dfd62eb6aa5c814f83f68d9 Mon Sep 17 00:00:00 2001
From: Juan Carlos Rodriguez <ing.juancarlosrodriguez@hotmail.com>
Date: Wed, 18 May 2011 15:00:03 +0300
Subject: [PATCH 720/830] rtpenc: MP4A-LATM payload support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is enabled with an AVOption on the RTP muxer. The SDP
generator looks for a latm flag in the rtpflags field.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/Makefile       |  2 +-
 libavformat/Makefile      |  1 +
 libavformat/rtpenc.c      | 20 +++++++++-
 libavformat/rtpenc.h      |  6 +++
 libavformat/rtpenc_latm.c | 61 ++++++++++++++++++++++++++++++
 libavformat/sdp.c         | 78 +++++++++++++++++++++++++++++++++++++++
 libavformat/version.h     |  2 +-
 7 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 libavformat/rtpenc_latm.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7fd6b49d32..581d6bf399 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -545,7 +545,7 @@ OBJS-$(CONFIG_NUT_MUXER)               += mpegaudiodata.o
 OBJS-$(CONFIG_OGG_DEMUXER)             += flacdec.o flacdata.o flac.o \
                                           dirac.o mpeg12data.o
 OBJS-$(CONFIG_OGG_MUXER)               += xiph.o flacdec.o flacdata.o flac.o
-OBJS-$(CONFIG_RTP_MUXER)               += mpegvideo.o xiph.o
+OBJS-$(CONFIG_RTP_MUXER)               += mpeg4audio.o mpegvideo.o xiph.o
 OBJS-$(CONFIG_SPDIF_DEMUXER)           += aacadtsdec.o mpeg4audio.o
 OBJS-$(CONFIG_WEBM_MUXER)              += xiph.o mpeg4audio.o \
                                           flacdec.o flacdata.o flac.o \
diff --git a/libavformat/Makefile b/libavformat/Makefile
index ba978af7a4..c2fa8af466 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -228,6 +228,7 @@ OBJS-$(CONFIG_RSO_MUXER)                 += rsoenc.o rso.o
 OBJS-$(CONFIG_RPL_DEMUXER)               += rpl.o
 OBJS-$(CONFIG_RTP_MUXER)                 += rtp.o         \
                                             rtpenc_aac.o     \
+                                            rtpenc_latm.o    \
                                             rtpenc_amr.o     \
                                             rtpenc_h263.o    \
                                             rtpenc_mpv.o     \
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 7cedff382e..c264b30a07 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -23,11 +23,25 @@
 #include "mpegts.h"
 #include "internal.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/opt.h"
 
 #include "rtpenc.h"
 
 //#define DEBUG
 
+static const AVOption options[] = {
+    { "rtpflags", "RTP muxer flags", offsetof(RTPMuxContext, flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" },
+    { "latm", "Use MP4A-LATM packetization instead of MPEG4-GENERIC for AAC", 0, FF_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_MP4A_LATM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" },
+    { NULL },
+};
+
+static const AVClass rtp_muxer_class = {
+    .class_name = "RTP muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 #define RTCP_SR_SIZE 28
 
 static int is_supported(enum CodecID id)
@@ -404,7 +418,10 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
         ff_rtp_send_mpegvideo(s1, pkt->data, size);
         break;
     case CODEC_ID_AAC:
-        ff_rtp_send_aac(s1, pkt->data, size);
+        if (s->flags & FF_RTP_FLAG_MP4A_LATM)
+            ff_rtp_send_latm(s1, pkt->data, size);
+        else
+            ff_rtp_send_aac(s1, pkt->data, size);
         break;
     case CODEC_ID_AMR_NB:
     case CODEC_ID_AMR_WB:
@@ -455,4 +472,5 @@ AVOutputFormat ff_rtp_muxer = {
     rtp_write_header,
     rtp_write_packet,
     rtp_write_trailer,
+    .priv_class = &rtp_muxer_class,
 };
diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h
index 21c5c312a4..ac5a62242d 100644
--- a/libavformat/rtpenc.h
+++ b/libavformat/rtpenc.h
@@ -25,6 +25,7 @@
 #include "rtp.h"
 
 struct RTPMuxContext {
+    const AVClass *av_class;
     AVFormatContext *ic;
     AVStream *st;
     int payload_type;
@@ -56,15 +57,20 @@ struct RTPMuxContext {
      * (1, 2 or 4)
      */
     int nal_length_size;
+
+    int flags;
 };
 
 typedef struct RTPMuxContext RTPMuxContext;
 
+#define FF_RTP_FLAG_MP4A_LATM 1
+
 void ff_rtp_send_data(AVFormatContext *s1, const uint8_t *buf1, int len, int m);
 
 void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_h263(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_aac(AVFormatContext *s1, const uint8_t *buff, int size);
+void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size);
 void ff_rtp_send_amr(AVFormatContext *s1, const uint8_t *buff, int size);
 void ff_rtp_send_mpegvideo(AVFormatContext *s1, const uint8_t *buf1, int size);
 void ff_rtp_send_xiph(AVFormatContext *s1, const uint8_t *buff, int size);
diff --git a/libavformat/rtpenc_latm.c b/libavformat/rtpenc_latm.c
new file mode 100644
index 0000000000..aa6e29117f
--- /dev/null
+++ b/libavformat/rtpenc_latm.c
@@ -0,0 +1,61 @@
+/*
+ * RTP Packetization of MPEG-4 Audio (RFC 3016)
+ * Copyright (c) 2011 Juan Carlos Rodriguez <ing.juancarlosrodriguez@hotmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rtpenc.h"
+
+void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size)
+{
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    RTPMuxContext *s = s1->priv_data;
+    int header_size;
+    int offset = 0;
+    int len    = 0;
+
+    /* skip ADTS header, if present */
+    if ((s1->streams[0]->codec->extradata_size) == 0) {
+        size -= 7;
+        buff += 7;
+    }
+
+    /* PayloadLengthInfo() */
+    header_size = size/0xFF + 1;
+    memset(s->buf, 0xFF, header_size - 1);
+    s->buf[header_size - 1] = size % 0xFF;
+
+    s->timestamp = s->cur_timestamp;
+
+    /* PayloadMux() */
+    while (size > 0) {
+        len   = FFMIN(size, s->max_payload_size - (!offset ? header_size : 0));
+        size -= len;
+        if (!offset) {
+            memcpy(s->buf + header_size, buff, len);
+            ff_rtp_send_data(s1, s->buf, header_size + len, !size);
+        } else {
+            ff_rtp_send_data(s1, buff + offset, len, !size);
+        }
+        offset += len;
+    }
+}
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index b996bf65f5..92690f58a3 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -23,7 +23,9 @@
 #include "libavutil/base64.h"
 #include "libavutil/dict.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/opt.h"
 #include "libavcodec/xiph.h"
+#include "libavcodec/mpeg4audio.h"
 #include "avformat.h"
 #include "internal.h"
 #include "avc.h"
@@ -300,6 +302,71 @@ xiph_fail:
     return NULL;
 }
 
+static int latm_context2profilelevel(AVCodecContext *c)
+{
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    int profile_level = 0x2B;
+
+    /* TODO: AAC Profile only supports AAC LC Object Type.
+     * Different Object Types should implement different Profile Levels */
+
+    if (c->sample_rate <= 24000) {
+        if (c->channels <= 2)
+            profile_level = 0x28; // AAC Profile, Level 1
+    } else if (c->sample_rate <= 48000) {
+        if (c->channels <= 2) {
+            profile_level = 0x29; // AAC Profile, Level 2
+        } else if (c->channels <= 5) {
+            profile_level = 0x2A; // AAC Profile, Level 4
+        }
+    } else if (c->sample_rate <= 96000) {
+        if (c->channels <= 5) {
+            profile_level = 0x2B; // AAC Profile, Level 5
+        }
+    }
+
+    return profile_level;
+}
+
+static char *latm_context2config(AVCodecContext *c)
+{
+    /* MP4A-LATM
+     * The RTP payload format specification is described in RFC 3016
+     * The encoding specifications are provided in ISO/IEC 14496-3 */
+
+    uint8_t config_byte[6];
+    int rate_index;
+    char *config;
+
+    for (rate_index = 0; rate_index < 16; rate_index++)
+        if (ff_mpeg4audio_sample_rates[rate_index] == c->sample_rate)
+            break;
+    if (rate_index == 16) {
+        av_log(c, AV_LOG_ERROR, "Unsupported sample rate\n");
+        return NULL;
+    }
+
+    config_byte[0] = 0x40;
+    config_byte[1] = 0;
+    config_byte[2] = 0x20 | rate_index;
+    config_byte[3] = c->channels << 4;
+    config_byte[4] = 0x3f;
+    config_byte[5] = 0xc0;
+
+    config = av_malloc(6*2+1);
+    if (!config) {
+        av_log(c, AV_LOG_ERROR, "Cannot allocate memory for the config info.\n");
+        return NULL;
+    }
+    ff_data_to_hex(config, config_byte, 6, 1);
+    config[12] = 0;
+
+    return config;
+}
+
 static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type, AVFormatContext *fmt)
 {
     char *config = NULL;
@@ -335,6 +402,16 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                      payload_type, config ? config : "");
             break;
         case CODEC_ID_AAC:
+            if (fmt && fmt->oformat->priv_class &&
+                av_opt_flag_is_set(fmt->priv_data, "rtpflags", "latm")) {
+                config = latm_context2config(c);
+                if (!config)
+                    return NULL;
+                av_strlcatf(buff, size, "a=rtpmap:%d MP4A-LATM/%d/%d\r\n"
+                                        "a=fmtp:%d profile-level-id=%d;cpresent=0;config=%s\r\n",
+                                         payload_type, c->sample_rate, c->channels,
+                                         payload_type, latm_context2profilelevel(c), config);
+            } else {
             if (c->extradata_size) {
                 config = extradata2config(c);
             } else {
@@ -353,6 +430,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                     "indexdeltalength=3%s\r\n",
                                      payload_type, c->sample_rate, c->channels,
                                      payload_type, config);
+            }
             break;
         case CODEC_ID_PCM_S16BE:
             if (payload_type >= RTP_PT_PRIVATE)
diff --git a/libavformat/version.h b/libavformat/version.h
index 0b53005a6f..ca61ab165f 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -25,7 +25,7 @@
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
 #define LIBAVFORMAT_VERSION_MINOR  1
-#define LIBAVFORMAT_VERSION_MICRO  0
+#define LIBAVFORMAT_VERSION_MICRO  1
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 9c434ce8263097bd17e6a59461041f846edc2701 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 15:48:20 +0300
Subject: [PATCH 721/830] sdp: Reindent after the previous commit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/sdp.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 92690f58a3..c227c7f603 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -412,24 +412,24 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                                          payload_type, c->sample_rate, c->channels,
                                          payload_type, latm_context2profilelevel(c), config);
             } else {
-            if (c->extradata_size) {
-                config = extradata2config(c);
-            } else {
-                /* FIXME: maybe we can forge config information based on the
-                 *        codec parameters...
-                 */
-                av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n");
-                return NULL;
-            }
-            if (config == NULL) {
-                return NULL;
-            }
-            av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n"
-                                    "a=fmtp:%d profile-level-id=1;"
-                                    "mode=AAC-hbr;sizelength=13;indexlength=3;"
-                                    "indexdeltalength=3%s\r\n",
-                                     payload_type, c->sample_rate, c->channels,
-                                     payload_type, config);
+                if (c->extradata_size) {
+                    config = extradata2config(c);
+                } else {
+                    /* FIXME: maybe we can forge config information based on the
+                     *        codec parameters...
+                     */
+                    av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n");
+                    return NULL;
+                }
+                if (config == NULL) {
+                    return NULL;
+                }
+                av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n"
+                                        "a=fmtp:%d profile-level-id=1;"
+                                        "mode=AAC-hbr;sizelength=13;indexlength=3;"
+                                        "indexdeltalength=3%s\r\n",
+                                         payload_type, c->sample_rate, c->channels,
+                                         payload_type, config);
             }
             break;
         case CODEC_ID_PCM_S16BE:

From 635fac9af10092b0d5780a7d1b422b3044f9abd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 27 May 2011 22:38:36 +0300
Subject: [PATCH 722/830] rtpenc: Declare the rtp flags private AVOptions in
 rtpenc.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows other muxers that chain a RTP muxer to declare
the same options easily.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtpenc.c | 3 +--
 libavformat/rtpenc.h | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index c264b30a07..3da6dfb3a9 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -30,8 +30,7 @@
 //#define DEBUG
 
 static const AVOption options[] = {
-    { "rtpflags", "RTP muxer flags", offsetof(RTPMuxContext, flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" },
-    { "latm", "Use MP4A-LATM packetization instead of MPEG4-GENERIC for AAC", 0, FF_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_MP4A_LATM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" },
+    FF_RTP_FLAG_OPTS(RTPMuxContext, flags),
     { NULL },
 };
 
diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h
index ac5a62242d..3a9e19be06 100644
--- a/libavformat/rtpenc.h
+++ b/libavformat/rtpenc.h
@@ -65,6 +65,10 @@ typedef struct RTPMuxContext RTPMuxContext;
 
 #define FF_RTP_FLAG_MP4A_LATM 1
 
+#define FF_RTP_FLAG_OPTS(ctx, fieldname) \
+    { "rtpflags", "RTP muxer flags", offsetof(ctx, fieldname), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
+    { "latm", "Use MP4A-LATM packetization instead of MPEG4-GENERIC for AAC", 0, FF_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_MP4A_LATM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" } \
+
 void ff_rtp_send_data(AVFormatContext *s1, const uint8_t *buf1, int len, int m);
 
 void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size);

From ff0824f72c25787b6ad60b5c90a25473995e68c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 15:42:53 +0300
Subject: [PATCH 723/830] rtpenc_chain: Pass the rtpflags options through to
 the chained muxer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtpenc_chain.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c
index 1727740f0f..87c1688dfc 100644
--- a/libavformat/rtpenc_chain.c
+++ b/libavformat/rtpenc_chain.c
@@ -23,6 +23,7 @@
 #include "avio_internal.h"
 #include "rtpenc_chain.h"
 #include "avio_internal.h"
+#include "libavutil/opt.h"
 
 AVFormatContext *ff_rtp_chain_mux_open(AVFormatContext *s, AVStream *st,
                                        URLContext *handle, int packet_size)
@@ -49,6 +50,13 @@ AVFormatContext *ff_rtp_chain_mux_open(AVFormatContext *s, AVStream *st,
     /* Copy other stream parameters. */
     rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio;
 
+    av_set_parameters(rtpctx, NULL);
+    /* Copy the rtpflags values straight through */
+    if (s->oformat->priv_class &&
+        av_find_opt(s->priv_data, "rtpflags", NULL, 0, 0))
+        av_set_int(rtpctx->priv_data, "rtpflags",
+                   av_get_int(s->priv_data, "rtpflags", NULL));
+
     /* Set the synchronized start time. */
     rtpctx->start_time_realtime = s->start_time_realtime;
 

From 6cf09bb7ef5a52b9d9b589067d94a5c80f9fe848 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 21 May 2011 15:03:35 +0300
Subject: [PATCH 724/830] rtspenc: Add an AVClass for setting muxer specific
 options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtspenc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c
index c1fc97ca8f..b7fa330f53 100644
--- a/libavformat/rtspenc.c
+++ b/libavformat/rtspenc.c
@@ -33,9 +33,21 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
 #include "url.h"
+#include "libavutil/opt.h"
 
 #define SDP_MAX_SIZE 16384
 
+static const AVOption options[] = {
+    { NULL },
+};
+
+static const AVClass rtsp_muxer_class = {
+    .class_name = "RTSP muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 int ff_rtsp_setup_output_streams(AVFormatContext *s, const char *addr)
 {
     RTSPState *rt = s->priv_data;
@@ -238,5 +250,6 @@ AVOutputFormat ff_rtsp_muxer = {
     rtsp_write_packet,
     rtsp_write_close,
     .flags = AVFMT_NOFILE | AVFMT_GLOBALHEADER,
+    .priv_class = &rtsp_muxer_class,
 };
 

From e2e29c62476f7d8f8851d0d51809ce2a152362f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 21 May 2011 15:03:48 +0300
Subject: [PATCH 725/830] rtspenc: Add RTP muxer options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtsp.h    | 5 +++++
 libavformat/rtspenc.c | 2 ++
 libavformat/version.h | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index f5a7fada21..5eae6bf4f3 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h
@@ -344,6 +344,11 @@ typedef struct RTSPState {
      * Do not begin to play the stream immediately.
      */
     int initial_pause;
+
+    /**
+     * Option flags for the chained RTP muxer.
+     */
+    int rtp_muxer_flags;
 } RTSPState;
 
 /**
diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c
index b7fa330f53..b76b6adb99 100644
--- a/libavformat/rtspenc.c
+++ b/libavformat/rtspenc.c
@@ -34,10 +34,12 @@
 #include "libavutil/avstring.h"
 #include "url.h"
 #include "libavutil/opt.h"
+#include "rtpenc.h"
 
 #define SDP_MAX_SIZE 16384
 
 static const AVOption options[] = {
+    FF_RTP_FLAG_OPTS(RTSPState, rtp_muxer_flags),
     { NULL },
 };
 
diff --git a/libavformat/version.h b/libavformat/version.h
index ca61ab165f..bd7f3c0e48 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -25,7 +25,7 @@
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
 #define LIBAVFORMAT_VERSION_MINOR  1
-#define LIBAVFORMAT_VERSION_MICRO  1
+#define LIBAVFORMAT_VERSION_MICRO  2
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From f3f82296a407d3c4eada161aaec45b21a5334fc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 18 May 2011 15:41:38 +0300
Subject: [PATCH 726/830] movenc: Pass the RTP AVFormatContext to the SDP
 generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/movenc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index b313510b25..ccd39ffa84 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1354,12 +1354,12 @@ static int mov_write_uuid_tag_psp(AVIOContext *pb, MOVTrack *mov)
     return 0x34;
 }
 
-static int mov_write_udta_sdp(AVIOContext *pb, AVCodecContext *ctx, int index)
+static int mov_write_udta_sdp(AVIOContext *pb, AVFormatContext *ctx, int index)
 {
     char buf[1000] = "";
     int len;
 
-    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0, NULL);
+    ff_sdp_write_media(buf, sizeof(buf), ctx->streams[0]->codec, NULL, NULL, 0, 0, ctx);
     av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index);
     len = strlen(buf);
 
@@ -1387,7 +1387,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
     if (track->mode == MODE_PSP)
         mov_write_uuid_tag_psp(pb,track);  // PSP Movies require this uuid box
     if (track->tag == MKTAG('r','t','p',' '))
-        mov_write_udta_sdp(pb, track->rtp_ctx->streams[0]->codec, track->trackID);
+        mov_write_udta_sdp(pb, track->rtp_ctx, track->trackID);
     if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO && track->mode == MODE_MOV) {
         double sample_aspect_ratio = av_q2d(st->sample_aspect_ratio);
         if (0.0 != sample_aspect_ratio && 1.0 != sample_aspect_ratio)

From d16cccac98a250d53827fa0c82e429bf17070d0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 21 May 2011 14:58:43 +0300
Subject: [PATCH 727/830] movenc: Add RTP muxer/hinter options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/movenc.c  | 2 ++
 libavformat/movenc.h  | 1 +
 libavformat/version.h | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index ccd39ffa84..dcc5581443 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -34,6 +34,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/dict.h"
+#include "rtpenc.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -41,6 +42,7 @@
 static const AVOption options[] = {
     { "movflags", "MOV muxer flags", offsetof(MOVMuxContext, flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "rtphint", "Add RTP hint tracks", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MOV_FLAG_RTP_HINT}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags),
     { NULL },
 };
 
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 69b6621711..39cdb39284 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -111,6 +111,7 @@ typedef struct MOVMuxContext {
     MOVTrack *tracks;
 
     int flags;
+    int rtp_flags;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT 1
diff --git a/libavformat/version.h b/libavformat/version.h
index bd7f3c0e48..83fa431246 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -25,7 +25,7 @@
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
 #define LIBAVFORMAT_VERSION_MINOR  1
-#define LIBAVFORMAT_VERSION_MICRO  2
+#define LIBAVFORMAT_VERSION_MICRO  3
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 62519c6139bd3d3e7f8004ad4e7d2d67e63b1c87 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:24:04 +0200
Subject: [PATCH 728/830] lavc: bump minor after the addition of
 AVCodecContext.request_sample_fmt

---
 libavcodec/version.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/version.h b/libavcodec/version.h
index 5a2e0cce2f..100c06dfe6 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,8 +21,8 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR  6
-#define LIBAVCODEC_VERSION_MICRO  1
+#define LIBAVCODEC_VERSION_MINOR  7
+#define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \

From ace0a5e236364f5dd6f16195509da899bdd35d8f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:54:54 +0200
Subject: [PATCH 729/830] lavf: bump minor after the addition of fps_probe_size
 to AVFormatContext

---
 libavformat/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/version.h b/libavformat/version.h
index 203ac34039..50c39dc7d9 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -24,7 +24,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFORMAT_VERSION_MAJOR 53
-#define LIBAVFORMAT_VERSION_MINOR  2
+#define LIBAVFORMAT_VERSION_MINOR  3
 #define LIBAVFORMAT_VERSION_MICRO  0
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \

From bd77a5e27c6701d68f671e702f4b4d57191f7b3c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:06:55 +0200
Subject: [PATCH 730/830] APIchanges: add entry for av_force_cpu_flags()
 addition

---
 doc/APIchanges | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/APIchanges b/doc/APIchanges
index 92ab57ecb8..c42b91d8c4 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-02 - 5ad38d - lavu 51.4.0 - av_force_cpu_flags()
+  Add av_cpu_flags() in libavutil/cpu.h.
+
 2011-06-06 - xxxxxx - lavfi 2.14.0 - AVFilterBufferRefAudioProps
   Remove AVFilterBufferRefAudioProps.size, and use nb_samples in
   avfilter_get_audio_buffer() and avfilter_default_get_audio_buffer() in

From e0ce9711aa01be2a1334756c648e569c72fb789f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:08:55 +0200
Subject: [PATCH 731/830] APIchanges: correctly interleave entries

---
 doc/APIchanges | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index c42b91d8c4..f5c7da7cde 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,8 +13,17 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-06-02 - 5ad38d - lavu 51.4.0 - av_force_cpu_flags()
-  Add av_cpu_flags() in libavutil/cpu.h.
+2011-06-xx - xxxxxxx - lavu 51.5.0 - AVMetadata
+  Move AVMetadata from lavf to lavu and rename it to
+  AVDictionary -- new installed header dict.h.
+  All av_metadata_* functions renamed to av_dict_*.
+
+2011-06-07 - a6703fa - lavu 51.4.0 - av_get_bytes_per_sample()
+  Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
+  Deprecate av_get_bits_per_sample_fmt().
+
+2011-06-xx - xxxxxxx - lavu 51.3.0 - opt.h
+  Add av_opt_free convenience function.
 
 2011-06-06 - xxxxxx - lavfi 2.14.0 - AVFilterBufferRefAudioProps
   Remove AVFilterBufferRefAudioProps.size, and use nb_samples in
@@ -36,10 +45,20 @@ API changes, most recent first:
 2011-06-03 - xxxxxx - lavfi 2.12.0 - avfilter_link_free()
   Add avfilter_link_free() function.
 
+2011-06-02 - 5ad38d - lavu 51.4.0 - av_force_cpu_flags()
+  Add av_cpu_flags() in libavutil/cpu.h.
+
 2011-05-28 - xxxxxx - lavu 51.3.0 - pixdesc.h
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
 
+2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
+  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
+  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
+
+2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
+  Add fps_probe_size to AVFormatContext.
+
 2011-05-22 - xxxxxx - lavf 53.2.0 - avformat.h
   Introduce avformat_alloc_output_context2() and deprecate
   avformat_alloc_output_context().
@@ -53,25 +72,6 @@ API changes, most recent first:
 2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
   Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.
 
-2011-06-xx - xxxxxxx - lavu 51.5.0 - AVMetadata
-  Move AVMetadata from lavf to lavu and rename it to
-  AVDictionary -- new installed header dict.h.
-  All av_metadata_* functions renamed to av_dict_*.
-
-2011-06-07 - a6703fa - lavu 51.4.0 - av_get_bytes_per_sample()
-  Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
-  Deprecate av_get_bits_per_sample_fmt().
-
-2011-06-xx - xxxxxxx - lavu 51.3.0 - opt.h
-  Add av_opt_free convenience function.
-
-2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
-  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
-  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
-
-2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
-  Add fps_probe_size to AVFormatContext.
-
 2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt
   Add request_sample_fmt field to AVCodecContext.
 

From a5825b22a2dc6652c17bf33ebfab2719eed2e569 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:50:55 +0200
Subject: [PATCH 732/830] APIchanges: remove duplicated entry

Remove duplicated entry regarding PIX_FMT_BGR48LE/BE formats.
---
 doc/APIchanges | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index f5c7da7cde..b189d353e9 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -156,9 +156,6 @@ API changes, most recent first:
     333e894 deprecate url_open_protocol
     e230705 deprecate url_poll and URLPollEntry
 
-2011-04-10 - lavu  50.40.0 - pixfmt.h
-  Add PIX_FMT_BGR48LE and PIX_FMT_BGR48BE pixel formats
-
 2011-04-08 - lavf 52.106.0 - avformat.h
   Minor avformat.h cleanup:
     a9bf9d8 deprecate av_guess_image2_codec

From 2caaa791a0d6e4a10bd4e9bbd5b88985f6a81454 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 11:15:42 +0200
Subject: [PATCH 733/830] APIchanges: fill in dates and numbers

Since some minor bump were not done, some entries present the same
libavutil minor (which represent the next bump after the corresponding
change).
---
 doc/APIchanges | 54 +++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index b189d353e9..47e9df6315 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,42 +13,42 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-06-xx - xxxxxxx - lavu 51.5.0 - AVMetadata
+2011-06-09 - d9f80ea - lavu 51.8.0 - AVMetadata
   Move AVMetadata from lavf to lavu and rename it to
   AVDictionary -- new installed header dict.h.
   All av_metadata_* functions renamed to av_dict_*.
 
-2011-06-07 - a6703fa - lavu 51.4.0 - av_get_bytes_per_sample()
+2011-06-07 - a6703fa - lavu 51.8.0 - av_get_bytes_per_sample()
   Add av_get_bytes_per_sample() in libavutil/samplefmt.h.
   Deprecate av_get_bits_per_sample_fmt().
 
-2011-06-xx - xxxxxxx - lavu 51.3.0 - opt.h
+2011-06-xx - b39b062 - lavu 51.8.0 - opt.h
   Add av_opt_free convenience function.
 
-2011-06-06 - xxxxxx - lavfi 2.14.0 - AVFilterBufferRefAudioProps
+2011-06-06 - 95a0242 - lavfi 2.14.0 - AVFilterBufferRefAudioProps
   Remove AVFilterBufferRefAudioProps.size, and use nb_samples in
   avfilter_get_audio_buffer() and avfilter_default_get_audio_buffer() in
   place of size.
 
-2011-06-06 - xxxxxx - lavu 51.6.0 - av_samples_alloc()
+2011-06-06 - 0bc2cca - lavu 51.6.0 - av_samples_alloc()
   Switch nb_channels and nb_samples parameters order in
   av_samples_alloc().
 
-2011-06-06 - xxxxxx - lavu 51.5.0 - av_samples_*
+2011-06-06 - e1c7414 - lavu 51.5.0 - av_samples_*
   Change the data layout created by av_samples_fill_arrays() and
   av_samples_alloc().
 
-2011-06-06 - xxxxxx - lavfi 2.13.0 - vsrc_buffer.h
+2011-06-06 - 27bcf55 - lavfi 2.13.0 - vsrc_buffer.h
   Make av_vsrc_buffer_add_video_buffer_ref() accepts an additional
   flags parameter in input.
 
-2011-06-03 - xxxxxx - lavfi 2.12.0 - avfilter_link_free()
+2011-06-03 - e977ca2 - lavfi 2.12.0 - avfilter_link_free()
   Add avfilter_link_free() function.
 
-2011-06-02 - 5ad38d - lavu 51.4.0 - av_force_cpu_flags()
+2011-06-02 - 5ad38d9 - lavu 51.4.0 - av_force_cpu_flags()
   Add av_cpu_flags() in libavutil/cpu.h.
 
-2011-05-28 - xxxxxx - lavu 51.3.0 - pixdesc.h
+2011-05-28 - e71f260 - lavu 51.3.0 - pixdesc.h
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
 
@@ -56,52 +56,52 @@ API changes, most recent first:
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
 
-2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
+2011-05-25 - 30315a8 - lavf 53.3.0 - avformat.h
   Add fps_probe_size to AVFormatContext.
 
-2011-05-22 - xxxxxx - lavf 53.2.0 - avformat.h
+2011-05-22 - 5ecdfd0 - lavf 53.2.0 - avformat.h
   Introduce avformat_alloc_output_context2() and deprecate
   avformat_alloc_output_context().
 
-2011-05-22 - xxxxxx - lavfi 2.10.0 - vsrc_buffer.h
+2011-05-22 - 83db719 - lavfi 2.10.0 - vsrc_buffer.h
   Make libavfilter/vsrc_buffer.h public.
 
-2011-05-XX - XXXXXX - lavfi 2.8.0 - avcodec.h
+2011-05-19 - c000a9f - lavfi 2.8.0 - avcodec.h
   Add av_vsrc_buffer_add_frame() to libavfilter/avcodec.h.
 
-2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
+2011-05-14 - 9fdf772 - lavfi 2.6.0 - avcodec.h
   Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.
 
-2011-05-18 - 64150ff - lavc 53.4.0 - AVCodecContext.request_sample_fmt
+2011-05-18 - 64150ff - lavc 53.7.0 - AVCodecContext.request_sample_fmt
   Add request_sample_fmt field to AVCodecContext.
 
-2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h
+2011-05-10 - 188dea1 - lavc 53.6.0 - avcodec.h
   Deprecate AVLPCType and the following fields in
   AVCodecContext: lpc_coeff_precision, prediction_order_method,
   min_partition_order, max_partition_order, lpc_type, lpc_passes.
   Corresponding FLAC encoder options should be used instead.
 
-2011-05-07 - xxxxxxx - lavfi 2.5.0 - avcodec.h
+2011-05-07 - 9fdf772 - lavfi 2.5.0 - avcodec.h
   Add libavfilter/avcodec.h header and avfilter_copy_frame_props()
   function.
 
-2011-05-07 - xxxxxxx - lavc 53.5.0 - AVFrame
+2011-05-07 - 18ded93 - lavc 53.5.0 - AVFrame
   Add format field to AVFrame.
 
-2011-05-07 - xxxxxxx - lavc 53.4.0 - AVFrame
+2011-05-07 - 22333a6 - lavc 53.4.0 - AVFrame
   Add width and height fields to AVFrame.
 
-2011-05-01 - xxxxxxx - lavfi 2.4.0 - avfilter.h
+2011-05-01 - 35fe66a - lavfi 2.4.0 - avfilter.h
   Rename AVFilterBufferRefVideoProps.pixel_aspect to
   sample_aspect_ratio.
 
-2011-05-01 - xxxxxxx - lavc 53.3.0 - AVFrame
+2011-05-01 - 77e9dee - lavc 53.3.0 - AVFrame
   Add a sample_aspect_ratio field to AVFrame.
 
-2011-05-01 - xxxxxxx - lavc 53.2.0 - AVFrame
+2011-05-01 - 1ba5727 - lavc 53.2.0 - AVFrame
   Add a pkt_pos field to AVFrame.
 
-2011-04-xx - xxxxxxx - lavu 51.2.0 - mem.h
+2011-04-29 - 35ceaa7 - lavu 51.2.0 - mem.h
   Add av_dynarray_add function for adding
   an element to a dynamic array.
 
@@ -204,7 +204,7 @@ API changes, most recent first:
 2011-03-25 - 34b47d7 - lavc 52.115.0 - AVCodecContext.audio_service_type
   Add audio_service_type field to AVCodecContext.
 
-2011-XX-XX - XXXXXXX - lavu  XX.XXX.X - pixfmt.h
+2011-03-17 - e309fdc - lavu 50.40.0 - pixfmt.h
   Add PIX_FMT_BGR48LE and PIX_FMT_BGR48BE pixel formats
 
 2011-03-02 - 863c471 - lavf  52.103.0 - av_pkt_dump2, av_pkt_dump_log2
@@ -273,10 +273,10 @@ API changes, most recent first:
 2011-02-02 - dfd2a00 - lavu 50.37.0 - log.h
   Make av_dlog public.
 
-2011-01-31 - X - lavfi 1.76.0 - vsrc_buffer
+2011-01-31 - 7b3ea55 - lavfi 1.76.0 - vsrc_buffer
   Add sample_aspect_ratio fields to vsrc_buffer arguments
 
-2011-01-31 - X - lavfi 1.75.0 - AVFilterLink sample_aspect_ratio
+2011-01-31 - 910b5b8 - lavfi 1.75.0 - AVFilterLink sample_aspect_ratio
   Add sample_aspect_ratio field to AVFilterLink.
 
 2011-01-15 - r26374 - lavfi 1.74.0 - AVFilterBufferRefAudioProps

From c6d7fc276ac07d53027d9dbd8bb4d0a33820a6aa Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 12:32:25 +0200
Subject: [PATCH 734/830] APIchanges: remove duplicated entry

---
 doc/APIchanges | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 47e9df6315..fc68af0a45 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -52,10 +52,6 @@ API changes, most recent first:
   Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
   avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
 
-2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
-  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
-  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
-
 2011-05-25 - 30315a8 - lavf 53.3.0 - avformat.h
   Add fps_probe_size to AVFormatContext.
 

From c3819600e2975cb5dc8ca07e0ea41f8204324e4a Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Mon, 31 Jan 2011 00:07:41 +0100
Subject: [PATCH 735/830] lavfi: implement
 avfilter_get_audio_buffer_ref_from_arrays()

---
 doc/APIchanges         |  3 +++
 libavfilter/avfilter.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 libavfilter/avfilter.h | 21 +++++++++++++++++++--
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index fc68af0a45..2d11ed5462 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-10 - xxxxxxx - lavfi 2.15.0 - avfilter_get_audio_buffer_ref_from_arrays
+  Add avfilter_get_audio_buffer_ref_from_arrays() to avfilter.h.
+
 2011-06-09 - d9f80ea - lavu 51.8.0 - AVMetadata
   Move AVMetadata from lavf to lavu and rename it to
   AVDictionary -- new installed header dict.h.
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 6d55350f7c..3b2e3ca2be 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -421,6 +421,48 @@ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
     return ret;
 }
 
+AVFilterBufferRef *
+avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms,
+                                          int nb_samples, enum AVSampleFormat sample_fmt,
+                                          int64_t channel_layout, int planar)
+{
+    AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
+    AVFilterBufferRef *samplesref = av_mallocz(sizeof(AVFilterBufferRef));
+
+    if (!samples || !samplesref)
+        goto fail;
+
+    samplesref->buf = samples;
+    samplesref->buf->free = ff_avfilter_default_free_buffer;
+    if (!(samplesref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps))))
+        goto fail;
+
+    samplesref->audio->nb_samples     = nb_samples;
+    samplesref->audio->channel_layout = channel_layout;
+    samplesref->audio->planar         = planar;
+
+    /* make sure the buffer gets read permission or it's useless for output */
+    samplesref->perms = perms | AV_PERM_READ;
+
+    samples->refcount = 1;
+    samplesref->type = AVMEDIA_TYPE_AUDIO;
+    samplesref->format = sample_fmt;
+
+    memcpy(samples->data,        data,     sizeof(samples->data));
+    memcpy(samples->linesize,    linesize, sizeof(samples->linesize));
+    memcpy(samplesref->data,     data,     sizeof(samplesref->data));
+    memcpy(samplesref->linesize, linesize, sizeof(samplesref->linesize));
+
+    return samplesref;
+
+fail:
+    if (samplesref && samplesref->audio)
+        av_freep(&samplesref->audio);
+    av_freep(&samplesref);
+    av_freep(&samples);
+    return NULL;
+}
+
 int avfilter_request_frame(AVFilterLink *link)
 {
     FF_DPRINTF_START(NULL, request_frame); ff_dlog_link(NULL, link, 1);
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index ac954ca198..4f5ed36004 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,8 +26,8 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 14
-#define LIBAVFILTER_VERSION_MICRO  1
+#define LIBAVFILTER_VERSION_MINOR 15
+#define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
@@ -694,6 +694,23 @@ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
                                              enum AVSampleFormat sample_fmt, int nb_samples,
                                              int64_t channel_layout, int planar);
 
+/**
+ * Create an audio buffer reference wrapped around an already
+ * allocated samples buffer.
+ *
+ * @param data           pointers to the samples plane buffers
+ * @param linesize       linesize for the samples plane buffers
+ * @param perms          the required access permissions
+ * @param nb_samples     number of samples per channel
+ * @param sample_fmt     the format of each sample in the buffer to allocate
+ * @param channel_layout the channel layout of the buffer
+ * @param planar         audio data layout - planar or packed
+ */
+AVFilterBufferRef *
+avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms,
+                                          int nb_samples, enum AVSampleFormat sample_fmt,
+                                          int64_t channel_layout, int planar);
+
 /**
  * Request an input frame from the filter at the other end of the link.
  *

From 989184fea4854720caef35347992499ba8033195 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 12 Feb 2011 17:56:50 +0100
Subject: [PATCH 736/830] lavfi: use
 avfilter_get_audio_buffer_ref_from_arrays() in defaults.c

Use avfilter_get_audio_buffer_ref_from_arrays() in
avfilter_default_get_audio_buffer(), simplify.
---
 libavfilter/defaults.c | 48 ++++++++++++------------------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index ce8f3cd0da..c39ed64048 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -84,47 +84,27 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
                                                      enum AVSampleFormat sample_fmt, int nb_samples,
                                                      int64_t channel_layout, int planar)
 {
-    AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
-    AVFilterBufferRef *ref = NULL;
+    AVFilterBufferRef *samplesref = NULL;
+    int linesize[8];
+    uint8_t *data[8];
     int nb_channels = av_get_channel_layout_nb_channels(channel_layout);
 
-    if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef))))
-        goto fail;
-
-    ref->buf                   = samples;
-    ref->format                = sample_fmt;
-
-    ref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps));
-    if (!ref->audio)
-        goto fail;
-
-    ref->audio->channel_layout = channel_layout;
-    ref->audio->nb_samples     = nb_samples;
-    ref->audio->planar         = planar;
-
-    /* make sure the buffer gets read permission or it's useless for output */
-    ref->perms = perms | AV_PERM_READ;
-
-    samples->refcount   = 1;
-    samples->free       = ff_avfilter_default_free_buffer;
-
     /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
-    if (av_samples_alloc(samples->data, samples->linesize,
+    if (av_samples_alloc(data, linesize,
                          nb_channels, nb_samples, sample_fmt,
                          planar, 16) < 0)
-        goto fail;
+        return NULL;
 
-    memcpy(ref->data,     samples->data,     sizeof(ref->data));
-    memcpy(ref->linesize, samples->linesize, sizeof(ref->linesize));
+    samplesref =
+        avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
+                                                  nb_samples, sample_fmt,
+                                                  channel_layout, planar);
+    if (!samplesref) {
+        av_free(data[0]);
+        return NULL;
+    }
 
-    return ref;
-
-fail:
-    if (ref)
-        av_free(ref->audio);
-    av_free(ref);
-    av_free(samples);
-    return NULL;
+    return samplesref;
 }
 
 void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)

From 79a0ec1af4817bb7b989803b9f460d1e4acaf7b7 Mon Sep 17 00:00:00 2001
From: Mina Nagy Zaki <mnzaki@gmail.com>
Date: Wed, 8 Jun 2011 19:24:25 +0300
Subject: [PATCH 737/830] lavfi: avfilter_merge_formats: handle case where
 inputs are same

This fixes a double-free crash if lists are the same due to the two
merge_ref() calls at the end of the (useless) merging that happens.
---
 libavfilter/formats.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index ec7fca3817..58593fcce0 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -44,6 +44,8 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)
     AVFilterFormats *ret;
     unsigned i, j, k = 0;
 
+    if (a == b) return a;
+
     ret = av_mallocz(sizeof(AVFilterFormats));
 
     /* merge list of formats */

From 16c9e6717e8b9524bf9126153e336db2f386514c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 10 Jun 2011 13:03:22 +0200
Subject: [PATCH 738/830] APIchanges: fill hash for the
 avfilter_get_audio_buffer_ref_from_arrays addition

---
 doc/APIchanges | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 2d11ed5462..aa08b9d4c9 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,7 +13,7 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
-2011-06-10 - xxxxxxx - lavfi 2.15.0 - avfilter_get_audio_buffer_ref_from_arrays
+2011-06-10 - c381960 - lavfi 2.15.0 - avfilter_get_audio_buffer_ref_from_arrays
   Add avfilter_get_audio_buffer_ref_from_arrays() to avfilter.h.
 
 2011-06-09 - d9f80ea - lavu 51.8.0 - AVMetadata

From f74e5b76b1033e5224d6a7b5906e05cecd2b3302 Mon Sep 17 00:00:00 2001
From: Kieran Kunhya <kierank@ob-encoder.com>
Date: Wed, 8 Jun 2011 01:26:20 -0500
Subject: [PATCH 739/830] adts: Adjust frame size mask to follow the
 specification.

This fixes ADTS detection for at least one sample.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavformat/aacdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/aacdec.c b/libavformat/aacdec.c
index 0dc1c5ce0f..6a184c77d7 100644
--- a/libavformat/aacdec.c
+++ b/libavformat/aacdec.c
@@ -44,7 +44,7 @@ static int adts_aac_probe(AVProbeData *p)
             uint32_t header = AV_RB16(buf2);
             if((header&0xFFF6) != 0xFFF0)
                 break;
-            fsize = (AV_RB32(buf2+3)>>13) & 0x8FFF;
+            fsize = (AV_RB32(buf2 + 3) >> 13) & 0x1FFF;
             if(fsize < 7)
                 break;
             buf2 += fsize;

From aecc596a651b981256f91ee619aaf282e455d99e Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 10 Jun 2011 12:58:08 -0400
Subject: [PATCH 740/830] Update copyright year for ac3enc_opts_template.c.

The code was originally committed to Libav on March 25, 2011.
---
 libavcodec/ac3enc_opts_template.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index 699c1b5982..e16e0d0878 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -1,6 +1,6 @@
 /*
  * AC-3 encoder options
- * Copyright (c) 2010 Justin Ruggles <justin.ruggles@gmail.com>
+ * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
  *
  * This file is part of Libav.
  *

From 919d7a345a7e9044c3cdc89cf06dc521a1b01c6c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 7 Jun 2011 13:18:12 +0200
Subject: [PATCH 741/830] Replace DEBUG_SEEK/DEBUG_SI + av_log combinations by
 av_dlog.

---
 libavformat/avidec.c  | 43 ++++++++++++++-------------------------
 libavformat/ffmdec.c  | 14 +++----------
 libavformat/mpeg.c    | 12 +++--------
 libavformat/mpegenc.c |  1 -
 libavformat/mpegts.c  |  2 --
 libavformat/nsvdec.c  |  2 --
 libavformat/utils.c   | 47 ++++++++++++++-----------------------------
 7 files changed, 36 insertions(+), 85 deletions(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index a915cc6588..dacd230613 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -19,9 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-//#define DEBUG
-//#define DEBUG_SEEK
-
 #include <strings.h>
 #include "libavutil/intreadwrite.h"
 #include "libavutil/bswap.h"
@@ -141,10 +138,8 @@ static int read_braindead_odml_indx(AVFormatContext *s, int frame_num){
     int64_t last_pos= -1;
     int64_t filesize= avio_size(s->pb);
 
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_ERROR, "longs_pre_entry:%d index_type:%d entries_in_use:%d chunk_id:%X base:%16"PRIX64"\n",
-        longs_pre_entry,index_type, entries_in_use, chunk_id, base);
-#endif
+    av_dlog(s, "longs_pre_entry:%d index_type:%d entries_in_use:%d chunk_id:%X base:%16"PRIX64"\n",
+            longs_pre_entry,index_type, entries_in_use, chunk_id, base);
 
     if(stream_id >= s->nb_streams || stream_id < 0)
         return -1;
@@ -176,9 +171,8 @@ static int read_braindead_odml_indx(AVFormatContext *s, int frame_num){
             int key= len >= 0;
             len &= 0x7FFFFFFF;
 
-#ifdef DEBUG_SEEK
-            av_log(s, AV_LOG_ERROR, "pos:%"PRId64", len:%X\n", pos, len);
-#endif
+            av_dlog(s, "pos:%"PRId64", len:%X\n", pos, len);
+
             if(pb->eof_reached)
                 return -1;
 
@@ -1134,10 +1128,8 @@ static int avi_read_idx1(AVFormatContext *s, int size)
         flags = avio_rl32(pb);
         pos = avio_rl32(pb);
         len = avio_rl32(pb);
-#if defined(DEBUG_SEEK)
-        av_log(s, AV_LOG_DEBUG, "%d: tag=0x%x flags=0x%x pos=0x%x len=%d/",
-               i, tag, flags, pos, len);
-#endif
+        av_dlog(s, "%d: tag=0x%x flags=0x%x pos=0x%x len=%d/",
+                i, tag, flags, pos, len);
         if(i==0 && pos > avi->movi_list)
             avi->movi_list= 0; //FIXME better check
         pos += avi->movi_list;
@@ -1149,9 +1141,8 @@ static int avi_read_idx1(AVFormatContext *s, int size)
         st = s->streams[index];
         ast = st->priv_data;
 
-#if defined(DEBUG_SEEK)
-        av_log(s, AV_LOG_DEBUG, "%d cum_len=%"PRId64"\n", len, ast->cum_len);
-#endif
+        av_dlog(s, "%d cum_len=%"PRId64"\n", len, ast->cum_len);
+
         if(pb->eof_reached)
             return -1;
 
@@ -1206,22 +1197,18 @@ static int avi_load_index(AVFormatContext *s)
 
     if (avio_seek(pb, avi->movi_end, SEEK_SET) < 0)
         goto the_end; // maybe truncated file
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "movi_end=0x%"PRIx64"\n", avi->movi_end);
-#endif
+    av_dlog(s, "movi_end=0x%"PRIx64"\n", avi->movi_end);
     for(;;) {
         if (pb->eof_reached)
             break;
         tag = avio_rl32(pb);
         size = avio_rl32(pb);
-#ifdef DEBUG_SEEK
-        av_log(s, AV_LOG_DEBUG, "tag=%c%c%c%c size=0x%x\n",
-               tag & 0xff,
-               (tag >> 8) & 0xff,
-               (tag >> 16) & 0xff,
-               (tag >> 24) & 0xff,
-               size);
-#endif
+        av_dlog(s, "tag=%c%c%c%c size=0x%x\n",
+                 tag        & 0xff,
+                (tag >>  8) & 0xff,
+                (tag >> 16) & 0xff,
+                (tag >> 24) & 0xff,
+                size);
         switch(tag) {
         case MKTAG('i', 'd', 'x', '1'):
             if (avi_read_idx1(s, size) < 0)
diff --git a/libavformat/ffmdec.c b/libavformat/ffmdec.c
index 8a6226a104..dfd86cb28b 100644
--- a/libavformat/ffmdec.c
+++ b/libavformat/ffmdec.c
@@ -163,8 +163,6 @@ static int ffm_read_data(AVFormatContext *s,
     return size1 - size;
 }
 
-//#define DEBUG_SEEK
-
 /* ensure that acutal seeking happens between FFM_PACKET_SIZE
    and file_size - FFM_PACKET_SIZE */
 static void ffm_seek1(AVFormatContext *s, int64_t pos1)
@@ -175,9 +173,7 @@ static void ffm_seek1(AVFormatContext *s, int64_t pos1)
 
     pos = FFMIN(pos1, ffm->file_size - FFM_PACKET_SIZE);
     pos = FFMAX(pos, FFM_PACKET_SIZE);
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos);
-#endif
+    av_dlog(s, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos);
     avio_seek(pb, pos, SEEK_SET);
 }
 
@@ -189,9 +185,7 @@ static int64_t get_dts(AVFormatContext *s, int64_t pos)
     ffm_seek1(s, pos);
     avio_skip(pb, 4);
     dts = avio_rb64(pb);
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "dts=%0.6f\n", dts / 1000000.0);
-#endif
+    av_dlog(s, "dts=%0.6f\n", dts / 1000000.0);
     return dts;
 }
 
@@ -465,9 +459,7 @@ static int ffm_seek(AVFormatContext *s, int stream_index, int64_t wanted_pts, in
     int64_t pts_min, pts_max, pts;
     double pos1;
 
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0);
-#endif
+    av_dlog(s, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0);
     /* find the position using linear interpolation (better than
        dichotomy in typical cases) */
     pos_min = FFM_PACKET_SIZE;
diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index 5b4996e825..496b9d45f5 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -23,8 +23,6 @@
 #include "internal.h"
 #include "mpeg.h"
 
-//#define DEBUG_SEEK
-
 #undef NDEBUG
 #include <assert.h>
 
@@ -589,9 +587,7 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index,
     for(;;) {
         len = mpegps_read_pes_header(s, &pos, &startcode, &pts, &dts);
         if (len < 0) {
-#ifdef DEBUG_SEEK
-            av_log(s, AV_LOG_DEBUG, "none (ret=%d)\n", len);
-#endif
+            av_dlog(s, "none (ret=%d)\n", len);
             return AV_NOPTS_VALUE;
         }
         if (startcode == s->streams[stream_index]->id &&
@@ -600,10 +596,8 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index,
         }
         avio_skip(s->pb, len);
     }
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n",
-           pos, dts, dts / 90000.0);
-#endif
+    av_dlog(s, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n",
+            pos, dts, dts / 90000.0);
     *ppos = pos;
     return dts;
 }
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 98169abcf5..820c5bd5e1 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -25,7 +25,6 @@
 #include "mpeg.h"
 
 #define MAX_PAYLOAD_SIZE 4096
-//#define DEBUG_SEEK
 
 #undef NDEBUG
 #include <assert.h>
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index bf81b20b5c..e9b8b3513a 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -19,8 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-//#define DEBUG
-//#define DEBUG_SEEK
 //#define USE_SYNCPOINT_SEARCH
 
 #include "libavutil/crc.h"
diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 08338c4609..9adb2f4729 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -22,9 +22,7 @@
 #include "riff.h"
 #include "libavutil/dict.h"
 
-//#define DEBUG
 //#define DEBUG_DUMP_INDEX // XXX dumbdriving-271.nsv breaks with it commented!!
-//#define DEBUG_SEEK
 #define CHECK_SUBSEQUENT_NSVS
 //#define DISABLE_AUDIO
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index b189bfb622..dc3b9d8fb8 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1421,8 +1421,6 @@ int av_index_search_timestamp(AVStream *st, int64_t wanted_timestamp,
                                      wanted_timestamp, flags);
 }
 
-#define DEBUG_SEEK
-
 int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts, int flags){
     AVInputFormat *avif= s->iformat;
     int64_t av_uninit(pos_min), av_uninit(pos_max), pos, pos_limit;
@@ -1434,9 +1432,7 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts
     if (stream_index < 0)
         return -1;
 
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "read_seek: %d %"PRId64"\n", stream_index, target_ts);
-#endif
+    av_dlog(s, "read_seek: %d %"PRId64"\n", stream_index, target_ts);
 
     ts_max=
     ts_min= AV_NOPTS_VALUE;
@@ -1453,10 +1449,8 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts
         if(e->timestamp <= target_ts || e->pos == e->min_distance){
             pos_min= e->pos;
             ts_min= e->timestamp;
-#ifdef DEBUG_SEEK
-            av_log(s, AV_LOG_DEBUG, "using cached pos_min=0x%"PRIx64" dts_min=%"PRId64"\n",
-                   pos_min,ts_min);
-#endif
+            av_dlog(s, "using cached pos_min=0x%"PRIx64" dts_min=%"PRId64"\n",
+                    pos_min,ts_min);
         }else{
             assert(index==0);
         }
@@ -1469,10 +1463,8 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts
             pos_max= e->pos;
             ts_max= e->timestamp;
             pos_limit= pos_max - e->min_distance;
-#ifdef DEBUG_SEEK
-            av_log(s, AV_LOG_DEBUG, "using cached pos_max=0x%"PRIx64" pos_limit=0x%"PRIx64" dts_max=%"PRId64"\n",
-                   pos_max,pos_limit, ts_max);
-#endif
+            av_dlog(s, "using cached pos_max=0x%"PRIx64" pos_limit=0x%"PRIx64" dts_max=%"PRId64"\n",
+                    pos_max,pos_limit, ts_max);
         }
     }
 
@@ -1494,9 +1486,7 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i
     int64_t start_pos, filesize;
     int no_change;
 
-#ifdef DEBUG_SEEK
-    av_log(s, AV_LOG_DEBUG, "gen_seek: %d %"PRId64"\n", stream_index, target_ts);
-#endif
+    av_dlog(s, "gen_seek: %d %"PRId64"\n", stream_index, target_ts);
 
     if(ts_min == AV_NOPTS_VALUE){
         pos_min = s->data_offset;
@@ -1538,11 +1528,8 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i
 
     no_change=0;
     while (pos_min < pos_limit) {
-#ifdef DEBUG_SEEK
-        av_log(s, AV_LOG_DEBUG, "pos_min=0x%"PRIx64" pos_max=0x%"PRIx64" dts_min=%"PRId64" dts_max=%"PRId64"\n",
-               pos_min, pos_max,
-               ts_min, ts_max);
-#endif
+        av_dlog(s, "pos_min=0x%"PRIx64" pos_max=0x%"PRIx64" dts_min=%"PRId64" dts_max=%"PRId64"\n",
+                pos_min, pos_max, ts_min, ts_max);
         assert(pos_limit <= pos_max);
 
         if(no_change==0){
@@ -1569,11 +1556,9 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i
             no_change++;
         else
             no_change=0;
-#ifdef DEBUG_SEEK
-        av_log(s, AV_LOG_DEBUG, "%"PRId64" %"PRId64" %"PRId64" / %"PRId64" %"PRId64" %"PRId64" target:%"PRId64" limit:%"PRId64" start:%"PRId64" noc:%d\n",
-               pos_min, pos, pos_max, ts_min, ts, ts_max, target_ts, pos_limit,
-               start_pos, no_change);
-#endif
+        av_dlog(s, "%"PRId64" %"PRId64" %"PRId64" / %"PRId64" %"PRId64" %"PRId64" target:%"PRId64" limit:%"PRId64" start:%"PRId64" noc:%d\n",
+                pos_min, pos, pos_max, ts_min, ts, ts_max, target_ts,
+                pos_limit, start_pos, no_change);
         if(ts == AV_NOPTS_VALUE){
             av_log(s, AV_LOG_ERROR, "read_timestamp() failed in the middle\n");
             return -1;
@@ -1592,13 +1577,13 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i
 
     pos = (flags & AVSEEK_FLAG_BACKWARD) ? pos_min : pos_max;
     ts  = (flags & AVSEEK_FLAG_BACKWARD) ?  ts_min :  ts_max;
-#ifdef DEBUG_SEEK
+#if 1
     pos_min = pos;
     ts_min = read_timestamp(s, stream_index, &pos_min, INT64_MAX);
     pos_min++;
     ts_max = read_timestamp(s, stream_index, &pos_min, INT64_MAX);
-    av_log(s, AV_LOG_DEBUG, "pos=0x%"PRIx64" %"PRId64"<=%"PRId64"<=%"PRId64"\n",
-           pos, ts_min, target_ts, ts_max);
+    av_dlog(s, "pos=0x%"PRIx64" %"PRId64"<=%"PRId64"<=%"PRId64"\n",
+            pos, ts_min, target_ts, ts_max);
 #endif
     *ts_ret= ts;
     return pos;
@@ -2651,9 +2636,7 @@ AVProgram *av_new_program(AVFormatContext *ac, int id)
     AVProgram *program=NULL;
     int i;
 
-#ifdef DEBUG_SI
-    av_log(ac, AV_LOG_DEBUG, "new_program: id=0x%04x\n", id);
-#endif
+    av_dlog(ac, "new_program: id=0x%04x\n", id);
 
     for(i=0; i<ac->nb_programs; i++)
         if(ac->programs[i]->id == id)

From 6c031a3338d49dd61cf34fd703631f5a47205912 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Fri, 10 Jun 2011 02:40:10 +0200
Subject: [PATCH 742/830] h264: Fix 10-bit H.264 x86 chroma v loopfilter asm.

The tc variable was not splatted correctly.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/h264_deblock_10bit.asm | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index f5a13f1250..baac725eec 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -836,6 +836,13 @@ DEBLOCK_LUMA_INTRA avx
     mova [r0+2*r1], m2
 %endmacro
 
+%macro CHROMA_V_LOAD_TC 2
+    movd        %1, [%2]
+    punpcklbw   %1, %1
+    punpcklwd   %1, %1
+    psraw       %1, 6
+%endmacro
+
 %macro DEBLOCK_CHROMA 1
 ;-----------------------------------------------------------------------------
 ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
@@ -854,7 +861,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
     LOAD_AB     m4, m5, r2, r3
     LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
     pxor        m4, m4
-    LOAD_TC     m6, r4
+    CHROMA_V_LOAD_TC m6, r4
     psubw       m6, [pw_3]
     pmaxsw      m6, m4
     pand        m7, m6

From c149843b5a484e73baffa0e070cfa08fe05943dd Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Thu, 9 Jun 2011 18:20:19 -0700
Subject: [PATCH 743/830] H.264: Fix high bit depth explicit biweight

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264dsp_template.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 91162ea900..be88f7487f 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -63,6 +63,7 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
     pixel *dst = (pixel*)_dst; \
     pixel *src = (pixel*)_src; \
     stride /= sizeof(pixel); \
+    offset <<= (BIT_DEPTH-8); \
     offset = ((offset + 1) | 1) << log2_denom; \
     for(y=0; y<H; y++, dst += stride, src += stride){ \
         op_scale2(0); \

From 103278f7b0b037a4a6184865ca9b8d021ec9be85 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 19:16:39 -0700
Subject: [PATCH 744/830] libavutil/swscale: YUV444P10/YUV444P9 support.

Also add missing glue code for recently added YUV422P10 formats
to swscale.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavutil/pixdesc.c           | 46 +++++++++++++++++++++++++++++++++++
 libavutil/pixfmt.h            |  6 +++++
 libswscale/swscale.c          | 14 ++++++++++-
 libswscale/swscale_internal.h | 12 +++++++++
 libswscale/utils.c            |  6 +++++
 5 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 77584755d6..efc7c7ea0e 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -878,6 +878,52 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
         },
         .flags = PIX_FMT_BE,
     },
+    [PIX_FMT_YUV444P10LE] = {
+        .name = "yuv444p10le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,9},        /* Y */
+            {1,1,1,0,9},        /* U */
+            {2,1,1,0,9},        /* V */
+        },
+    },
+    [PIX_FMT_YUV444P10BE] = {
+        .name = "yuv444p10be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,9},        /* Y */
+            {1,1,1,0,9},        /* U */
+            {2,1,1,0,9},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_YUV444P9LE] = {
+        .name = "yuv444p9le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,8},        /* Y */
+            {1,1,1,0,8},        /* U */
+            {2,1,1,0,8},        /* V */
+        },
+    },
+    [PIX_FMT_YUV444P9BE] = {
+        .name = "yuv444p9be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,9},        /* Y */
+            {1,1,1,0,9},        /* U */
+            {2,1,1,0,9},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
     [PIX_FMT_DXVA2_VLD] = {
         .name = "dxva2_vld",
         .log2_chroma_w = 1,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index d88775f462..e852d85519 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -141,6 +141,10 @@ enum PixelFormat {
     PIX_FMT_YUV420P10LE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
     PIX_FMT_YUV422P10BE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
     PIX_FMT_YUV422P10LE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+    PIX_FMT_YUV444P9BE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+    PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+    PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+    PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
     PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
 
@@ -166,8 +170,10 @@ enum PixelFormat {
 #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
 
 #define PIX_FMT_YUV420P9  PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
+#define PIX_FMT_YUV444P9  PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
 #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
 #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE)
+#define PIX_FMT_YUV444P10 PIX_FMT_NE(YUV444P10BE, YUV444P10LE)
 #define PIX_FMT_YUV420P16 PIX_FMT_NE(YUV420P16BE, YUV420P16LE)
 #define PIX_FMT_YUV422P16 PIX_FMT_NE(YUV422P16BE, YUV422P16LE)
 #define PIX_FMT_YUV444P16 PIX_FMT_NE(YUV444P16BE, YUV444P16LE)
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index bc076dd3f2..ba89a0f4be 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1821,7 +1821,7 @@ find_c_packed_planar_out_funcs(SwsContext *c,
     } else if (is16BPS(dstFormat)) {
         *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
     } else if (is9_OR_10BPS(dstFormat)) {
-        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
+        if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
         } else {
             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
@@ -2161,9 +2161,15 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
+        case PIX_FMT_YUV444P9BE:
         case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
+        case PIX_FMT_YUV444P9LE:
         case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
+        case PIX_FMT_YUV444P10BE:
+        case PIX_FMT_YUV422P10BE:
         case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
+        case PIX_FMT_YUV422P10LE:
+        case PIX_FMT_YUV444P10LE:
         case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
         case PIX_FMT_YUV420P16BE:
         case PIX_FMT_YUV422P16BE:
@@ -2219,9 +2225,15 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     c->lumToYV12 = NULL;
     c->alpToYV12 = NULL;
     switch (srcFormat) {
+    case PIX_FMT_YUV444P9BE:
     case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
+    case PIX_FMT_YUV444P9LE:
     case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
+    case PIX_FMT_YUV444P10BE:
+    case PIX_FMT_YUV422P10BE:
     case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
+    case PIX_FMT_YUV444P10LE:
+    case PIX_FMT_YUV422P10LE:
     case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
     case PIX_FMT_YUYV422  :
     case PIX_FMT_YUV420P16BE:
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 013eef9e31..483842e866 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -355,6 +355,12 @@ const char *sws_format_name(enum PixelFormat format);
 #define is9_OR_10BPS(x) (           \
            (x)==PIX_FMT_YUV420P9LE  \
         || (x)==PIX_FMT_YUV420P9BE  \
+        || (x)==PIX_FMT_YUV444P9BE  \
+        || (x)==PIX_FMT_YUV444P9LE  \
+        || (x)==PIX_FMT_YUV422P10BE \
+        || (x)==PIX_FMT_YUV422P10LE \
+        || (x)==PIX_FMT_YUV444P10BE \
+        || (x)==PIX_FMT_YUV444P10LE \
         || (x)==PIX_FMT_YUV420P10LE \
         || (x)==PIX_FMT_YUV420P10BE \
     )
@@ -373,12 +379,18 @@ const char *sws_format_name(enum PixelFormat format);
 #define isPlanarYUV(x)  (           \
         isPlanar8YUV(x)             \
         || (x)==PIX_FMT_YUV420P9LE  \
+        || (x)==PIX_FMT_YUV444P9LE  \
         || (x)==PIX_FMT_YUV420P10LE \
+        || (x)==PIX_FMT_YUV422P10LE \
+        || (x)==PIX_FMT_YUV444P10LE \
         || (x)==PIX_FMT_YUV420P16LE \
         || (x)==PIX_FMT_YUV422P16LE \
         || (x)==PIX_FMT_YUV444P16LE \
         || (x)==PIX_FMT_YUV420P9BE  \
+        || (x)==PIX_FMT_YUV444P9BE  \
         || (x)==PIX_FMT_YUV420P10BE \
+        || (x)==PIX_FMT_YUV422P10BE \
+        || (x)==PIX_FMT_YUV444P10BE \
         || (x)==PIX_FMT_YUV420P16BE \
         || (x)==PIX_FMT_YUV422P16BE \
         || (x)==PIX_FMT_YUV444P16BE \
diff --git a/libswscale/utils.c b/libswscale/utils.c
index d552330ec5..213bf3a043 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -110,12 +110,18 @@ const char *swscale_license(void)
         || (x)==PIX_FMT_MONOWHITE   \
         || (x)==PIX_FMT_MONOBLACK   \
         || (x)==PIX_FMT_YUV420P9LE    \
+        || (x)==PIX_FMT_YUV444P9LE    \
         || (x)==PIX_FMT_YUV420P10LE   \
+        || (x)==PIX_FMT_YUV422P10LE   \
+        || (x)==PIX_FMT_YUV444P10LE   \
         || (x)==PIX_FMT_YUV420P16LE   \
         || (x)==PIX_FMT_YUV422P16LE   \
         || (x)==PIX_FMT_YUV444P16LE   \
         || (x)==PIX_FMT_YUV420P9BE    \
+        || (x)==PIX_FMT_YUV444P9BE    \
         || (x)==PIX_FMT_YUV420P10BE   \
+        || (x)==PIX_FMT_YUV444P10BE   \
+        || (x)==PIX_FMT_YUV422P10BE   \
         || (x)==PIX_FMT_YUV420P16BE   \
         || (x)==PIX_FMT_YUV422P16BE   \
         || (x)==PIX_FMT_YUV444P16BE   \

From 0fd82fedc9b279cd12f71dd7de954cfb60fa9dea Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 10 Jun 2011 20:25:49 +0200
Subject: [PATCH 745/830] pixfmt: Replace 9/10bit deprecation by a technical
 explanation.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/pixfmt.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 95972f9373..6e4f1610d7 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -136,7 +136,9 @@ enum PixelFormat {
     PIX_FMT_BGR48BE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian
     PIX_FMT_BGR48LE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian
 
-    //the following 6 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately
+    //the following 6 formats have the disadvantage of needing 1 format for each bit depth, thus
+    //If you want to support multiple bit depths, then using PIX_FMT_YUV420P16* with the bpp stored seperately
+    //is better
     PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
     PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
     PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian

From 19d824e47373594739bb9a05cd4e7edbc441b173 Mon Sep 17 00:00:00 2001
From: Alex Converse <aconverse@google.com>
Date: Tue, 26 Apr 2011 09:08:26 -0700
Subject: [PATCH 746/830] bitstream: Properly promote av_reverse values before
 shifting.

---
 libavcodec/bitstream.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index 97d5b49105..b593db55ce 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -116,10 +116,10 @@ static int alloc_table(VLC *vlc, int size, int use_static)
 }
 
 static av_always_inline uint32_t bitswap_32(uint32_t x) {
-    return av_reverse[x&0xFF]<<24
-         | av_reverse[(x>>8)&0xFF]<<16
-         | av_reverse[(x>>16)&0xFF]<<8
-         | av_reverse[x>>24];
+    return (uint32_t)av_reverse[x&0xFF]<<24
+         | (uint32_t)av_reverse[(x>>8)&0xFF]<<16
+         | (uint32_t)av_reverse[(x>>16)&0xFF]<<8
+         | (uint32_t)av_reverse[x>>24];
 }
 
 typedef struct {

From d3cef0a85b7d3fd133a3349341646fe15aeb0030 Mon Sep 17 00:00:00 2001
From: Dave Badia <dbadia@gmail.com>
Date: Fri, 10 Jun 2011 21:18:26 +0200
Subject: [PATCH 747/830] Extract rotation in MOV metadata

---
 libavformat/mov.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 00330a6f27..4eb8c74a34 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1926,6 +1926,10 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     sc->width = width >> 16;
     sc->height = height >> 16;
 
+    if (display_matrix[0][0] == -65536 && display_matrix[1][1] == -65536) {
+         av_metadata_set2(&st->metadata, "rotate", "180", 0);
+    }
+
     // transform the display width/height according to the matrix
     // skip this if the display matrix is the default identity matrix
     // or if it is rotating the picture, ex iPhone 3GS

From 36204ed88e6c5eb54f36df2dea94b20e786b215f Mon Sep 17 00:00:00 2001
From: Joseph Artsimovich <joseph@mirriad.com>
Date: Fri, 10 Jun 2011 21:38:14 +0200
Subject: [PATCH 748/830] dnxhd: Renama tables

---
 libavcodec/dnxhddata.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index 2625a9be0c..f628a642a5 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -243,11 +243,11 @@ static const uint8_t dnxhd_1238_ac_index_flag[257] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 };
 
-static const uint16_t dnxhd_1238_run_codes[62] = {
+static const uint16_t dnxhd_1235_1238_1241_run_codes[62] = {
     0, 4, 10, 11, 24, 25, 26, 27, 56, 57, 58, 59, 120, 242, 486, 487, 488, 489, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
 };
 
-static const uint8_t dnxhd_1238_run_bits[62] = {
+static const uint8_t dnxhd_1235_1238_1241_run_bits[62] = {
     1, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
 };
 
@@ -255,34 +255,34 @@ static const uint8_t dnxhd_1238_run[62] = {
     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
 };
 
-static const uint8_t dnxhd_1241_dc_codes[14] = {
+static const uint8_t dnxhd_1235_1241_dc_codes[14] = {
     10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127,
 };
 
-static const uint8_t dnxhd_1241_dc_bits[14] = {
+static const uint8_t dnxhd_1235_1241_dc_bits[14] = {
     4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7,
 };
-static const uint16_t dnxhd_1241_ac_codes[257] = {
+static const uint16_t dnxhd_1235_1241_ac_codes[257] = {
     0, 1, 4, 10, 11, 24, 25, 26, 54, 55, 56, 57, 116, 117, 118, 119, 240, 241, 242, 243, 244, 245, 492, 493, 494, 495, 496, 497, 498, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4068, 4069, 8140, 8141, 8142, 8143, 8144, 8145, 8146, 8147, 8148, 8149, 8150, 8151, 8152, 8153, 8154, 8155, 8156, 8157, 16316, 16317, 16318, 16319, 16320, 16321, 16322, 16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330, 16331, 16332, 16333, 16334, 16335, 16336, 16337, 32676, 32677, 32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685, 32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693, 32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701, 32702, 32703, 32704, 32705, 32706, 32707, 32708, 65418, 65419, 65420, 65421, 65422, 65423, 65424, 65425, 65426, 65427, 65428, 65429, 65430, 65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438, 65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446, 65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454, 65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462, 65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470, 65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478, 65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486, 65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494, 65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502, 65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518, 65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534, 65535,
 };
 
-static const uint8_t dnxhd_1241_ac_bits[257] = {
+static const uint8_t dnxhd_1235_1241_ac_bits[257] = {
     2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
 };
 
-static const uint8_t dnxhd_1241_ac_level[257] = {
+static const uint8_t dnxhd_1235_1241_ac_level[257] = {
     1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4, 12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 6, 7, 22, 23, 24, 25, 26, 27, 28, 29, 8, 9, 30, 31, 32, 33, 34, 35, 36, 37, 38, 10, 11, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 12, 13, 14, 15, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 1, 16, 17, 18, 19, 64, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 25, 26, 27, 28, 29, 30, 31, 32, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
 };
 
-static const uint8_t dnxhd_1241_ac_run_flag[257] = {
+static const uint8_t dnxhd_1235_1241_ac_run_flag[257] = {
     0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 };
 
-static const uint8_t dnxhd_1241_ac_index_flag[257] = {
+static const uint8_t dnxhd_1235_1241_ac_index_flag[257] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 };
 
-static const uint8_t dnxhd_1241_run[62] = {
+static const uint8_t dnxhd_1235_1241_run[62] = {
     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
 };
 
@@ -352,14 +352,14 @@ const CIDEntry ff_dnxhd_cid_table[] = {
       dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
       dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
-      dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run,
+      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
       { 175, 185, 220, 365, 440 } },
     { 1241, 1920, 1080, 1, 917504, 458752, 6, 10,
       dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
-      dnxhd_1241_dc_codes, dnxhd_1241_dc_bits,
-      dnxhd_1241_ac_codes, dnxhd_1241_ac_bits, dnxhd_1241_ac_level,
-      dnxhd_1241_ac_run_flag, dnxhd_1241_ac_index_flag,
-      dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1241_run,
+      dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
+      dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
+      dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
       { 185, 220 } },
     { 1242, 1920, 1080, 1, 606208, 303104, 4, 8,
       dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight,
@@ -373,7 +373,7 @@ const CIDEntry ff_dnxhd_cid_table[] = {
       dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
       dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
-      dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run,
+      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
       { 185, 220 } },
     { 1251, 1280,  720, 0, 458752, 458752, 4, 8,
       dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,

From 39dbe9b6b8fdd587793583c8296fecf94b3aee58 Mon Sep 17 00:00:00 2001
From: Etienne Buira <etienne.buira.lists@free.fr>
Date: Sat, 11 Jun 2011 00:04:40 +0200
Subject: [PATCH 749/830] Fix -t option for formats which holds dts and no pts

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffmpeg.c                   |   2 +-
 tests/ref/fate/lmlm4-demux | 331 -------------------------------------
 2 files changed, 1 insertion(+), 332 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index a8ac9bb209..35395a99a4 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2727,7 +2727,7 @@ static int transcode(AVFormatContext **output_files,
 
         /* finish if recording time exhausted */
         if (recording_time != INT64_MAX &&
-            (pkt.pts != AV_NOPTS_VALUE || pkt.dts != AV_NOPTS_VALUE ?
+            (pkt.pts != AV_NOPTS_VALUE ?
                 av_compare_ts(pkt.pts, ist->st->time_base, recording_time + start_time, (AVRational){1, 1000000})
                     :
                 av_compare_ts(ist->pts, AV_TIME_BASE_Q, recording_time + start_time, (AVRational){1, 1000000})
diff --git a/tests/ref/fate/lmlm4-demux b/tests/ref/fate/lmlm4-demux
index f322300eee..1d0d59f54d 100644
--- a/tests/ref/fate/lmlm4-demux
+++ b/tests/ref/fate/lmlm4-demux
@@ -214,334 +214,3 @@
 0, 267267, 1327, 0x7d15307c
 1, 267840, 768, 0x8d766d40
 0, 270270, 1225, 0x1b5d0f5f
-0, 273273, 1173, 0x840efed5
-0, 276276, 1215, 0xa8e0035e
-0, 279279, 1295, 0x142918ca
-0, 282282, 1144, 0xf50cef50
-0, 285285, 1527, 0x7d13bd9d
-0, 288288, 5609, 0x1ae1921d
-0, 291291, 1303, 0xabdc264f
-0, 294294, 1419, 0x878169bf
-0, 297297, 972, 0x00c4a257
-0, 300300, 1277, 0x87d520cf
-0, 303303, 1014, 0x5946b4ee
-0, 306306, 1177, 0x124e0e23
-0, 309309, 1402, 0x8e6363cc
-0, 312312, 1171, 0x9bdaeda2
-0, 315315, 1389, 0x2db53b22
-0, 318318, 1056, 0xd1c3de3e
-0, 321321, 1320, 0x1ea142c7
-0, 324324, 1250, 0x33612229
-0, 327327, 1477, 0xb9648b48
-0, 330330, 1522, 0x5352c318
-0, 333333, 1391, 0x5e9157e0
-0, 336336, 5545, 0x569e64c1
-0, 339339, 1354, 0xdb39469e
-0, 342342, 1302, 0x79912b5d
-0, 345345, 1065, 0x4befcdd2
-0, 348348, 1408, 0x7d2f65a2
-0, 351351, 1727, 0x9cac0398
-0, 354354, 1590, 0xa321b563
-0, 357357, 1039, 0xfa35cabf
-0, 360360, 1184, 0xb332fde7
-0, 363363, 669, 0xb10e3783
-0, 366366, 784, 0x57275e09
-0, 369369, 1051, 0xe072cd33
-0, 372372, 1119, 0x635ee9ee
-0, 375375, 1147, 0x3916f981
-0, 378378, 1086, 0x306ef895
-0, 381381, 827, 0x213f7aef
-0, 384384, 5525, 0x19157827
-0, 387387, 1044, 0xb661abc5
-0, 390390, 1143, 0x032e1109
-0, 393393, 1460, 0x5a2f9503
-0, 396396, 1178, 0xd038141f
-0, 399399, 1004, 0x410ec3b2
-0, 402402, 1089, 0xc89af8c9
-0, 405405, 1367, 0x52085e0a
-0, 408408, 1115, 0x8bb2ee7f
-0, 411411, 1325, 0xc2e05647
-0, 414414, 1295, 0x213951c9
-0, 417417, 1054, 0xbb8bdaae
-0, 420420, 1210, 0x431122bd
-0, 423423, 1400, 0x47526fcc
-0, 426426, 1188, 0x19770b07
-0, 429429, 1301, 0x437161c8
-0, 432432, 5281, 0xc0c92b55
-0, 435435, 840, 0x67da7b2f
-0, 438438, 1094, 0x3fd6d944
-0, 441441, 832, 0x0eda74bc
-0, 444444, 1096, 0x3300da7b
-0, 447447, 1018, 0xa208c971
-0, 450450, 1389, 0x1167724c
-0, 453453, 1411, 0xe3be666b
-0, 456456, 1294, 0xa8f35cc6
-0, 459459, 1232, 0xfd0d20fd
-0, 462462, 1252, 0xadd83a26
-0, 465465, 844, 0xcbaf6a55
-0, 468468, 979, 0x78d9b241
-0, 471471, 1057, 0x6743e16c
-0, 474474, 776, 0xfedd6615
-0, 477477, 1158, 0xa39fee34
-0, 480480, 5288, 0x5f26ee02
-0, 483483, 1029, 0xa681bee8
-0, 486486, 1106, 0xa68dea33
-0, 489489, 844, 0x42fd83ec
-0, 492492, 779, 0xb5006759
-0, 495495, 951, 0xec13af4f
-0, 498498, 1011, 0x90e5c86e
-0, 501501, 892, 0x4db48ca4
-0, 504504, 804, 0x59bf73a7
-0, 507507, 1001, 0x10c2b3ff
-0, 510510, 879, 0x65c57eaf
-0, 513513, 1320, 0x80815836
-0, 516516, 1448, 0xaf457b3b
-0, 519519, 1168, 0x65b9f96a
-0, 522522, 1002, 0x053fafb9
-0, 525525, 1101, 0x2d30c3d5
-0, 528528, 5314, 0x87cee383
-0, 531531, 1305, 0xb19035db
-0, 534534, 1240, 0xdc6a0a65
-0, 537537, 1067, 0x9c88ba67
-0, 540540, 823, 0x2f736a43
-0, 543543, 1183, 0x2ef9f3c9
-0, 546546, 899, 0x3fcc8d11
-0, 549549, 886, 0xccec8d49
-0, 552552, 1190, 0x2d020fa1
-0, 555555, 1017, 0x0776b627
-0, 558558, 1202, 0xbdd808d5
-0, 561561, 998, 0x64c7c246
-0, 564564, 1200, 0x9d6e2289
-0, 567567, 895, 0xa8a68d80
-0, 570570, 748, 0xe61a49fb
-0, 573573, 929, 0x30168b50
-0, 576576, 5276, 0xceb2edf2
-0, 579579, 1127, 0xab43ddc3
-0, 582582, 1028, 0xaacfbff5
-0, 585585, 914, 0xb63c8fb0
-0, 588588, 1067, 0xbdacd1ed
-0, 591591, 1109, 0x6792ddec
-0, 594594, 1310, 0x71bc4da2
-0, 597597, 1098, 0xc464de9b
-0, 600600, 1018, 0x6833b875
-0, 603603, 1210, 0x44faf34b
-0, 606606, 1200, 0x9ee816f6
-0, 609609, 1461, 0xc76b7d2b
-0, 612612, 829, 0x006677e6
-0, 615615, 1145, 0xc769fb13
-0, 618618, 1292, 0xb63225f5
-0, 621621, 1252, 0x0e2a2626
-0, 624624, 5257, 0x3877eca1
-0, 627627, 952, 0x7f708d25
-0, 630630, 1125, 0x140cd81b
-0, 633633, 1095, 0x3025dade
-0, 636636, 1388, 0xd7494d4e
-0, 639639, 1124, 0x0c48ee92
-0, 642642, 1556, 0xa0749ee2
-0, 645645, 1461, 0xe5fd7d7f
-0, 648648, 903, 0x07a58303
-0, 651651, 1049, 0x4b6cd03b
-0, 654654, 1044, 0x5f47cb48
-0, 657657, 1253, 0xba281c6a
-0, 660660, 1618, 0xed7cd040
-0, 663663, 981, 0x2926b6f4
-0, 666666, 1560, 0xa0e1ab73
-0, 669669, 1479, 0x41a77e88
-0, 672672, 5222, 0xc2dbd182
-0, 675675, 925, 0x967580dd
-0, 678678, 1284, 0x5b7822e0
-0, 681681, 1512, 0xe84da1e0
-0, 684684, 1514, 0xc38bb09e
-0, 687687, 1224, 0x8752228e
-0, 690690, 1296, 0xcf053c03
-0, 693693, 1117, 0x9a81e659
-0, 696696, 1090, 0x003ed687
-0, 699699, 1196, 0x3a510937
-0, 702702, 1075, 0x05eec8d4
-0, 705705, 1048, 0x3b19cb96
-0, 708708, 944, 0xaad89770
-0, 711711, 960, 0x94649e4c
-0, 714714, 1079, 0x530ddaba
-0, 717717, 1150, 0x0339e696
-0, 720720, 5189, 0xb8dac0bf
-0, 723723, 1129, 0x3b2cd64d
-0, 726726, 962, 0xe9df9a07
-0, 729729, 1113, 0xc6ccddb2
-0, 732732, 1069, 0xf589d4a4
-0, 735735, 889, 0x5f7b8762
-0, 738738, 863, 0xe9c36be4
-0, 741741, 1021, 0xcfb5a737
-0, 744744, 1048, 0x203ac9ff
-0, 747747, 1223, 0x3e30fe35
-0, 750750, 814, 0x59c076fc
-0, 753753, 1157, 0x0dcf0bd0
-0, 756756, 1691, 0xdd030547
-0, 759759, 1700, 0x7641fb7e
-0, 762762, 1791, 0x57ac147b
-0, 765765, 2008, 0x3d4483ca
-0, 768768, 4579, 0x874aa75b
-0, 771771, 1647, 0xeddef621
-0, 774774, 1999, 0x61d4a23a
-0, 777777, 1572, 0x1c3ae6e1
-0, 780780, 1803, 0xb31c3a11
-0, 783783, 1919, 0xccbf64e3
-0, 786786, 1720, 0xa4d010e5
-0, 789789, 1721, 0x87ee0c7b
-0, 792792, 1626, 0x8211f3d0
-0, 795795, 1675, 0xef8a0b3d
-0, 798798, 1609, 0x8731ce06
-0, 801801, 1691, 0xcf24038b
-0, 804804, 1637, 0x21d8e1b2
-0, 807807, 1546, 0xc597a700
-0, 810810, 1518, 0xb944bc11
-0, 813813, 1403, 0x999e59a8
-0, 816816, 2467, 0xe69f2507
-0, 819819, 531, 0x3c7cea7e
-0, 822822, 555, 0xdf20fb22
-0, 825825, 500, 0xebeee00d
-0, 828828, 446, 0x664cc711
-0, 831831, 521, 0xf223df4b
-0, 834834, 559, 0x4dc60028
-0, 837837, 593, 0xec440ba9
-0, 840840, 557, 0xef0100b1
-0, 843843, 602, 0x7b1cfd88
-0, 846846, 566, 0x77700a1d
-0, 849849, 523, 0x3df7eb64
-0, 852852, 482, 0x5da1dba9
-0, 855855, 541, 0x9c8ff3d7
-0, 858858, 572, 0x3e1204b2
-0, 861861, 549, 0x0921fe3d
-0, 864864, 2429, 0xba4fe5a8
-0, 867867, 495, 0xc35ade54
-0, 870870, 453, 0xcc66c9dc
-0, 873873, 421, 0x3aa7ce8f
-0, 876876, 448, 0x56c6d3d7
-0, 879879, 478, 0x4131d467
-0, 882882, 497, 0xac3ce3ca
-0, 885885, 470, 0x41b9d9d3
-0, 888888, 454, 0x44c2d956
-0, 891891, 460, 0x6629db01
-0, 894894, 488, 0x6be2dd68
-0, 897897, 512, 0xda4cf116
-0, 900900, 550, 0x6e990da9
-0, 903903, 561, 0x81180e5e
-0, 906906, 689, 0xe58a5a9a
-0, 909909, 548, 0xfa1417a9
-0, 912912, 2832, 0x942495a5
-0, 915915, 610, 0x6b201ab9
-0, 918918, 1015, 0x5f36b3f9
-0, 921921, 870, 0x14e48f0c
-0, 924924, 716, 0xf4034b52
-0, 927927, 763, 0xcbf4694e
-0, 930930, 778, 0xb9396764
-0, 933933, 831, 0x31999005
-0, 936936, 877, 0xc95e977f
-0, 939939, 836, 0xb56c7d61
-0, 942942, 853, 0x2d5980cf
-0, 945945, 861, 0x25629295
-0, 948948, 897, 0x0ff78a5f
-0, 951951, 1016, 0x4dd8cdfd
-0, 954954, 1117, 0x763f06c4
-0, 957957, 984, 0xcf7bc906
-0, 960960, 2750, 0xd428962d
-0, 963963, 995, 0x5cbdd6a4
-0, 966966, 894, 0xc42b9e25
-0, 969969, 1028, 0xdf8ad906
-0, 972972, 1059, 0x4c49f0cc
-0, 975975, 1122, 0x8880eed8
-0, 978978, 1007, 0xa9b4c243
-0, 981981, 1055, 0x6051dcd6
-0, 984984, 1293, 0xc3b32fa5
-0, 987987, 1101, 0xf986f9af
-0, 990990, 1272, 0x13883127
-0, 993993, 1037, 0xb97cebff
-0, 996996, 980, 0x0931d807
-0, 999999, 928, 0xbc3eb30b
-0, 1003002, 1068, 0x62d9e8de
-0, 1006005, 852, 0x9278a49a
-0, 1009008, 2841, 0x3091d12d
-0, 1012011, 931, 0x60f6c26e
-0, 1015014, 949, 0x31b9c856
-0, 1018017, 835, 0xfe018775
-0, 1021020, 779, 0x85356cd7
-0, 1024023, 748, 0x862756bf
-0, 1027026, 768, 0x0b7d645c
-0, 1030029, 786, 0x7c196f5b
-0, 1033032, 716, 0x4e8252cc
-0, 1036035, 671, 0x0b2d3023
-0, 1039038, 708, 0x3b2b4f25
-0, 1042041, 786, 0x523d670e
-0, 1045044, 680, 0x329142ec
-0, 1048047, 703, 0x841b456c
-0, 1051050, 660, 0x5cf332f1
-0, 1054053, 681, 0xcd7b3915
-0, 1057056, 2445, 0x27660ecb
-0, 1060059, 667, 0xf3d53d2a
-0, 1063062, 652, 0xe2b037b0
-0, 1066065, 695, 0x200248fc
-0, 1069068, 659, 0x7f6434c5
-0, 1072071, 682, 0x8d243afb
-0, 1075074, 701, 0x16e6476f
-0, 1078077, 636, 0x319a3236
-0, 1081080, 679, 0x81fa41f9
-0, 1084083, 740, 0xb32850af
-0, 1087086, 694, 0xe3f832c2
-0, 1090089, 681, 0x8174353f
-0, 1093092, 757, 0xebbe5a1f
-0, 1096095, 683, 0x9b46383c
-0, 1099098, 816, 0xd41e6bdf
-0, 1102101, 1058, 0x6170d2e6
-0, 1105104, 2489, 0x58fb28e1
-0, 1108107, 804, 0xb3037da8
-0, 1111110, 1053, 0x81ffc0a8
-0, 1114113, 868, 0xf73583cb
-0, 1117116, 875, 0xfa5d85bd
-0, 1120119, 723, 0x0714418d
-0, 1123122, 670, 0xd04333a1
-0, 1126125, 854, 0x370e730d
-0, 1129128, 794, 0x3d8a5e3c
-0, 1132131, 836, 0xebe26aa7
-0, 1135134, 871, 0x1da58c5e
-0, 1138137, 827, 0xda1e6ccb
-0, 1141140, 805, 0x10ad6a44
-0, 1144143, 831, 0x826f6fc9
-0, 1147146, 832, 0xb2517364
-0, 1150149, 887, 0x11bf8a3f
-0, 1153152, 2718, 0x26a8a174
-0, 1156155, 805, 0x4d0179f9
-0, 1159158, 699, 0x176c4f45
-0, 1162161, 758, 0xc1fc5b16
-0, 1165164, 707, 0x161b4891
-0, 1168167, 733, 0x99b554c0
-0, 1171170, 671, 0xccee2f89
-0, 1174173, 762, 0xd6416c9d
-0, 1177176, 721, 0x2ad94f0c
-0, 1180179, 727, 0x6280572e
-0, 1183182, 856, 0x0a7b797e
-0, 1186185, 843, 0xc64288aa
-0, 1189188, 877, 0x6d1c945d
-0, 1192191, 780, 0x4ba464e8
-0, 1195194, 808, 0xb3087cca
-0, 1198197, 870, 0x75809930
-0, 1201200, 2919, 0x5a80f685
-0, 1204203, 1027, 0xc98add3d
-0, 1207206, 1003, 0x0d88bd54
-0, 1210209, 1189, 0xb2f91ec7
-0, 1213212, 1320, 0x5acc4db3
-0, 1216215, 1381, 0xbd585feb
-0, 1219218, 1378, 0xe1a656f0
-0, 1222221, 1398, 0x88b57a5e
-0, 1225224, 1449, 0x1c737698
-0, 1228227, 1420, 0x6f0f80cd
-0, 1231230, 1032, 0x2d16d643
-0, 1234233, 1275, 0x38844729
-0, 1237236, 1112, 0x300207ea
-0, 1240239, 1105, 0xa2b700be
-0, 1243242, 1283, 0x08d04bef
-0, 1246245, 1056, 0xf795d994
-0, 1249248, 3202, 0xebf07050
-0, 1252251, 1034, 0x1099dbe5
-0, 1255254, 922, 0x88be9edc
-0, 1258257, 1050, 0xd3d7eb96
-0, 1261260, 979, 0x8de6b302
-0, 1264263, 1053, 0x5de2eca8

From 7aa59471813f49f434fbc6460475c0871e80cef6 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 11 Jun 2011 04:18:00 +0200
Subject: [PATCH 750/830] Revert "ac3: there was no libav in 2010 thus this
 code cannot be from  libav."

This reverts commit 7b8ec38d28cc99c4017471c6981fc1206732b468.

The copyright date was wrong.
---
 libavcodec/ac3enc_opts_template.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index 17f70ab799..e16e0d0878 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -2,20 +2,20 @@
  * AC-3 encoder options
  * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
  *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From f5901fd392889dabe0d2eb9005e62ac667655b2f Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 11:15:40 +0200
Subject: [PATCH 751/830] avfiltergraph: use meaningful error codes

---
 libavfilter/avfiltergraph.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
index bdf22b3df9..d19fc59f00 100644
--- a/libavfilter/avfiltergraph.c
+++ b/libavfilter/avfiltergraph.c
@@ -90,7 +90,7 @@ int ff_avfilter_graph_check_validity(AVFilterGraph *graph, AVClass *log_ctx)
                 av_log(log_ctx, AV_LOG_ERROR,
                        "Input pad \"%s\" for the filter \"%s\" of type \"%s\" not connected to any source\n",
                        filt->input_pads[j].name, filt->name, filt->filter->name);
-                return -1;
+                return AVERROR(EINVAL);
             }
         }
 
@@ -99,7 +99,7 @@ int ff_avfilter_graph_check_validity(AVFilterGraph *graph, AVClass *log_ctx)
                 av_log(log_ctx, AV_LOG_ERROR,
                        "Output pad \"%s\" for the filter \"%s\" of type \"%s\" not connected to any destination\n",
                        filt->output_pads[j].name, filt->name, filt->filter->name);
-                return -1;
+                return AVERROR(EINVAL);
             }
         }
     }
@@ -178,7 +178,7 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
                         av_log(log_ctx, AV_LOG_ERROR,
                                "Impossible to convert between the formats supported by the filter "
                                "'%s' and the filter '%s'\n", link->src->name, link->dst->name);
-                        return -1;
+                        return AVERROR(EINVAL);
                     }
                 }
             }
@@ -216,9 +216,11 @@ static void pick_formats(AVFilterGraph *graph)
 
 int ff_avfilter_graph_config_formats(AVFilterGraph *graph, AVClass *log_ctx)
 {
+    int ret;
+
     /* find supported formats from sub-filters, and merge along links */
-    if (query_formats(graph, log_ctx))
-        return -1;
+    if ((ret = query_formats(graph, log_ctx)) < 0)
+        return ret;
 
     /* Once everything is merged, it's possible that we'll still have
      * multiple valid media format choices. We pick the first one. */

From d468ed032169c8007ed24cb2adfbee3a842eb742 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 11:41:49 +0200
Subject: [PATCH 752/830] lavfi: fix signature for avfilter_graph_parse() and
 avfilter_graph_config()

Require "void *" rather than "AVClass *" for the log context type.
---
 libavfilter/avfilter.h      | 2 +-
 libavfilter/avfiltergraph.c | 2 +-
 libavfilter/avfiltergraph.h | 4 ++--
 libavfilter/graphparser.c   | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 4f5ed36004..64d76cc0c9 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -27,7 +27,7 @@
 
 #define LIBAVFILTER_VERSION_MAJOR  2
 #define LIBAVFILTER_VERSION_MINOR 15
-#define LIBAVFILTER_VERSION_MICRO  0
+#define LIBAVFILTER_VERSION_MICRO  1
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
index d19fc59f00..60d529ba73 100644
--- a/libavfilter/avfiltergraph.c
+++ b/libavfilter/avfiltergraph.c
@@ -229,7 +229,7 @@ int ff_avfilter_graph_config_formats(AVFilterGraph *graph, AVClass *log_ctx)
     return 0;
 }
 
-int avfilter_graph_config(AVFilterGraph *graphctx, AVClass *log_ctx)
+int avfilter_graph_config(AVFilterGraph *graphctx, void *log_ctx)
 {
     int ret;
 
diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h
index 8013b46f5b..23a7c5138f 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/avfiltergraph.h
@@ -76,7 +76,7 @@ int avfilter_graph_create_filter(AVFilterContext **filt_ctx, AVFilter *filt,
  * @param log_ctx context used for logging
  * @return 0 in case of success, a negative AVERROR code otherwise
  */
-int avfilter_graph_config(AVFilterGraph *graphctx, AVClass *log_ctx);
+int avfilter_graph_config(AVFilterGraph *graphctx, void *log_ctx);
 
 /**
  * Free a graph, destroy its links, and set *graph to NULL.
@@ -118,6 +118,6 @@ typedef struct AVFilterInOut {
  */
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut *inputs, AVFilterInOut *outputs,
-                         AVClass *log_ctx);
+                         void *log_ctx);
 
 #endif /* AVFILTER_AVFILTERGRAPH_H */
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index 4f11529900..b7a2232270 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -329,7 +329,7 @@ static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs,
 
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut *open_inputs,
-                         AVFilterInOut *open_outputs, AVClass *log_ctx)
+                         AVFilterInOut *open_outputs, void *log_ctx)
 {
     int index = 0, ret;
     char chr = 0;

From e8876902a9021ec185ca785653067dd34f24c5ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <clement.boesch@smartjog.com>
Date: Wed, 8 Jun 2011 11:26:43 +0200
Subject: [PATCH 753/830] resample: clarify supported resampling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This also fix a crash on 8ch → 2ch requested resampling.
---
 libavcodec/resample.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/libavcodec/resample.c b/libavcodec/resample.c
index 8c4eebe3c4..82c09fc502 100644
--- a/libavcodec/resample.c
+++ b/libavcodec/resample.c
@@ -180,6 +180,21 @@ static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
     }
 }
 
+#define SUPPORT_RESAMPLE(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8) \
+    ch8<<7 | ch7<<6 | ch6<<5 | ch5<<4 | ch4<<3 | ch3<<2 | ch2<<1 | ch1<<0
+
+static const uint8_t supported_resampling[MAX_CHANNELS] = {
+    //ouput channels:1  2  3  4  5  6  7  8
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 0, 0, 0), // 1 input channel
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 1, 0, 0), // 2 input channels
+    SUPPORT_RESAMPLE(0, 0, 1, 0, 0, 0, 0, 0), // 3 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 1, 0, 0, 0, 0), // 4 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 1, 0, 0, 0), // 5 input channels
+    SUPPORT_RESAMPLE(0, 1, 0, 0, 0, 1, 0, 0), // 6 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 1, 0), // 7 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 0, 1), // 8 input channels
+};
+
 ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
                                         int output_rate, int input_rate,
                                         enum AVSampleFormat sample_fmt_out,
@@ -195,11 +210,15 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
                MAX_CHANNELS);
         return NULL;
     }
-    if (output_channels > 2 &&
-        !(output_channels == 6 && input_channels == 2) &&
-        output_channels != input_channels) {
-        av_log(NULL, AV_LOG_ERROR,
-               "Resampling output channel count must be 1 or 2 for mono input; 1, 2 or 6 for stereo input; or N for N channel input.\n");
+    if (!(supported_resampling[input_channels-1] & (1<<(output_channels-1)))) {
+        int i;
+        av_log(NULL, AV_LOG_ERROR, "Unsupported audio resampling. Allowed "
+               "output channels for %d input channel%s", input_channels,
+               input_channels > 1 ? "s:" : ":");
+        for (i = 0; i < MAX_CHANNELS; i++)
+            if (supported_resampling[input_channels-1] & (1<<i))
+                av_log(NULL, AV_LOG_ERROR, " %d", i + 1);
+        av_log(NULL, AV_LOG_ERROR, "\n");
         return NULL;
     }
 

From 623ffe8c82c3194e4eff469add76b1478025cd22 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 7 Jun 2011 10:11:21 +0100
Subject: [PATCH 754/830] build: simplify some conditional targets

Use intermediate targets instead of variables for conditional
parts of "all" and "install" targets.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index db1a9bd150..7c7cdaf167 100644
--- a/Makefile
+++ b/Makefile
@@ -41,15 +41,9 @@ FF_LDFLAGS   := $(FFLDFLAGS)
 FF_EXTRALIBS := $(FFEXTRALIBS)
 FF_DEP_LIBS  := $(DEP_LIBS)
 
-ALL_TARGETS-$(CONFIG_DOC)       += documentation
+all-$(CONFIG_DOC): documentation
 
-ifdef PROGS
-INSTALL_TARGETS-yes             += install-progs install-data
-INSTALL_TARGETS-$(CONFIG_DOC)   += install-man
-endif
-INSTALL_PROGS_TARGETS-$(CONFIG_SHARED) = install-libs
-
-all: $(FF_DEP_LIBS) $(PROGS) $(ALL_TARGETS-yes)
+all: $(FF_DEP_LIBS) $(PROGS)
 
 config.h: .config
 .config: $(wildcard $(FFLIBS:%=$(SRC_DIR)/lib%/all*.c))
@@ -127,11 +121,18 @@ doc/%.1: TAG = MAN
 doc/%.1: doc/%.pod
 	$(M)pod2man --section=1 --center=" " --release=" " $< > $@
 
-install: install-libs install-headers $(INSTALL_TARGETS-yes)
+ifdef PROGS
+install: install-progs install-data
+endif
+
+install: install-libs install-headers
 
 install-libs: install-libs-yes
 
-install-progs: $(PROGS) $(INSTALL_PROGS_TARGETS-yes)
+install-progs-$(CONFIG_DOC): install-man
+install-progs-$(CONFIG_SHARED): install-libs
+
+install-progs: install-progs-yes $(PROGS)
 	$(Q)mkdir -p "$(BINDIR)"
 	$(INSTALL) -c -m 755 $(PROGS) "$(BINDIR)"
 

From 151c5841881591552be685b7fd50b488f128ba5b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 11 Jun 2011 19:54:23 +0100
Subject: [PATCH 755/830] build: fix "make install" with documentation disabled

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 7c7cdaf167..0df8a11407 100644
--- a/Makefile
+++ b/Makefile
@@ -129,6 +129,7 @@ install: install-libs install-headers
 
 install-libs: install-libs-yes
 
+install-progs-yes:
 install-progs-$(CONFIG_DOC): install-man
 install-progs-$(CONFIG_SHARED): install-libs
 

From 2291e18e4891066bbcbcd1a4e468a59a40e9cb5c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 11 Jun 2011 13:40:46 +0200
Subject: [PATCH 756/830] Fix --disable-everything

Breakage was due to ff_find_pix_fmt() usage.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9cab9d7ae4..158014553c 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -18,6 +18,7 @@ OBJS = allcodecs.o                                                      \
        options.o                                                        \
        parser.o                                                         \
        raw.o                                                            \
+       rawdec.o                                                         \
        resample.o                                                       \
        resample2.o                                                      \
        simple_idct.o                                                    \

From 2f2c60400a4e383b6aa7db64d6032a42c2725882 Mon Sep 17 00:00:00 2001
From: "Sven C. Dack" <sven.c.dack@virginmedia.com>
Date: Wed, 30 Mar 2011 17:02:29 +0200
Subject: [PATCH 757/830] x11grab: remove a memory allocation and the
 associated memcpy.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavdevice/x11grab.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index c6dc673520..6e77a5c52a 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -410,10 +410,9 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
         nanosleep(&ts, NULL);
     }
 
-    if (av_new_packet(pkt, s->frame_size) < 0) {
-        return AVERROR(EIO);
-    }
-
+    av_init_packet(pkt);
+    pkt->data = image->data;
+    pkt->size = s->frame_size;
     pkt->pts = curtime;
 
     if(s->use_shm) {
@@ -430,9 +429,6 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
         paint_mouse_pointer(image, s);
     }
 
-
-    /* XXX: avoid memcpy */
-    memcpy(pkt->data, image->data, s->frame_size);
     return s->frame_size;
 }
 

From de859358830cfd1daffb1773f6bb069659027321 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sun, 12 Jun 2011 13:54:22 +0200
Subject: [PATCH 758/830] cmdutils: add missing NULL check in parse_options()

Fix ffplay -i FILE, which was recently broken.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 cmdutils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index b9a5d1b069..c73d5a1ca2 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -277,7 +277,7 @@ unknown_opt:
                 *po->u.int64_arg = parse_number_or_die(opt, arg, OPT_INT64, INT64_MIN, INT64_MAX);
             } else if (po->flags & OPT_FLOAT) {
                 *po->u.float_arg = parse_number_or_die(opt, arg, OPT_FLOAT, -INFINITY, INFINITY);
-            } else {
+            } else if (po->u.func_arg) {
                 if (po->u.func_arg(opt, arg) < 0) {
                     fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt);
                     exit(1);

From 225083ac0a2feead04ddd7485b8693dd9c742379 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 12 Jun 2011 17:59:23 +0200
Subject: [PATCH 759/830] mpeg12: replace 2 asserts by av_assert0

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mpeg12.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 09bed28fef..bd858a59b7 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -30,6 +30,7 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
+#include "libavutil/avassert.h"
 
 #include "mpeg12.h"
 #include "mpeg12data.h"
@@ -2339,7 +2340,7 @@ static int decode_chunks(AVCodecContext *avctx,
             if(s2->pict_type != AV_PICTURE_TYPE_B || avctx->skip_frame <= AVDISCARD_DEFAULT){
                 if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){
                     int i;
-                    assert(avctx->thread_count > 1);
+                    av_assert0(avctx->thread_count > 1);
 
                     avctx->execute(avctx, slice_decode_thread,  &s2->thread_context[0], NULL, s->slice_count, sizeof(void*));
                     for(i=0; i<s->slice_count; i++)
@@ -2508,7 +2509,7 @@ static int decode_chunks(AVCodecContext *avctx,
 
                 if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){
                     int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
-                    assert(avctx->thread_count > 1);
+                    av_assert0(avctx->thread_count > 1);
                     if(threshold <= mb_y){
                         MpegEncContext *thread_context= s2->thread_context[s->slice_count];
 

From 53781bf13e47d15f4d9964001af3894a72c513ee Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Thu, 2 Jun 2011 01:41:38 -0700
Subject: [PATCH 760/830] Update todo

All the known bugs in h264 are fixed so I'm calling it done.
(cherry picked from commit 9cec36a6845c17e90a8d0c2cf9b03a00987c31f0)
---
 mt-work/todo.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mt-work/todo.txt b/mt-work/todo.txt
index 861013da87..f64514f6fa 100644
--- a/mt-work/todo.txt
+++ b/mt-work/todo.txt
@@ -23,9 +23,6 @@ field pictures in the same packet are not optimal. Modify the
 nals_needed check so that the second field's first slice is
 considered as needed, then uncomment the FIXME code in decode_postinit.
 Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4
-- The code added to shorten frame gaps (to avoid allocating more than 16 new frames)
-appears to be wrong by inspection. It does not handle prev_frame_num > frame_num,
-and "h->frame_num - h->sps.ref_frame_count - 1" should be "h->frame_num - h->sps.ref_frame_count".
 
 mpeg4:
 - Packed B-frames need to be explicitly split up

From 33eac92a3c1aa0f3d0b5bbc6893ed5f646ee527c Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Thu, 2 Jun 2011 01:39:56 -0700
Subject: [PATCH 761/830] h264: Complexify frame num gap shortening code

By observation it did not seem to handle prev_frame_num > frame_num.
This does not affect any files I have.
(cherry picked from commit 43c0092a80f8212cbb783260bafa157f7b85126e)
---
 libavcodec/h264.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index dfe9d4a6a5..7d54e1c6c1 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2133,9 +2133,20 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
 
     if(h0->current_slice == 0){
-        if(h->frame_num != h->prev_frame_num &&
-          (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num) < (h->frame_num - h->sps.ref_frame_count))
-            h->prev_frame_num = h->frame_num - h->sps.ref_frame_count - 1;
+        // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
+        if(h->frame_num != h->prev_frame_num) {
+            int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
+
+            if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
+
+            if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
+                unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
+                if (unwrap_prev_frame_num < 0)
+                    unwrap_prev_frame_num += max_frame_num;
+
+                h->prev_frame_num = unwrap_prev_frame_num;
+            }
+        }
 
         while(h->frame_num !=  h->prev_frame_num &&
               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){

From 86909dd5f7cbc3d2446fad58268553ac06f65e37 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 14:33:09 +0200
Subject: [PATCH 762/830] graphparser: prefer void * over AVClass * for log
 contexts

---
 libavfilter/graphparser.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index b7a2232270..aa1f228f81 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -36,7 +36,7 @@
  */
 static int link_filter(AVFilterContext *src, int srcpad,
                        AVFilterContext *dst, int dstpad,
-                       AVClass *log_ctx)
+                       void *log_ctx)
 {
     int ret;
     if ((ret = avfilter_link(src, srcpad, dst, dstpad))) {
@@ -55,7 +55,7 @@ static int link_filter(AVFilterContext *src, int srcpad,
  * @return a pointer (that need to be freed after use) to the name
  * between parenthesis
  */
-static char *parse_link_name(const char **buf, AVClass *log_ctx)
+static char *parse_link_name(const char **buf, void *log_ctx)
 {
     const char *start = *buf;
     char *name;
@@ -92,7 +92,7 @@ static char *parse_link_name(const char **buf, AVClass *log_ctx)
  * @return 0 in case of success, a negative AVERROR code otherwise
  */
 static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int index,
-                         const char *filt_name, const char *args, AVClass *log_ctx)
+                         const char *filt_name, const char *args, void *log_ctx)
 {
     AVFilter *filt;
     char inst_name[30];
@@ -151,7 +151,7 @@ static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int ind
  * @return 0 in case of success, a negative AVERROR code otherwise
  */
 static int parse_filter(AVFilterContext **filt_ctx, const char **buf, AVFilterGraph *graph,
-                        int index, AVClass *log_ctx)
+                        int index, void *log_ctx)
 {
     char *opts = NULL;
     char *name = av_get_token(buf, "=,;[\n");
@@ -201,7 +201,7 @@ static void insert_inout(AVFilterInOut **inouts, AVFilterInOut *element)
 
 static int link_filter_inouts(AVFilterContext *filt_ctx,
                               AVFilterInOut **curr_inputs,
-                              AVFilterInOut **open_inputs, AVClass *log_ctx)
+                              AVFilterInOut **open_inputs, void *log_ctx)
 {
     int pad = filt_ctx->input_count, ret;
 
@@ -249,7 +249,7 @@ static int link_filter_inouts(AVFilterContext *filt_ctx,
 }
 
 static int parse_inputs(const char **buf, AVFilterInOut **curr_inputs,
-                        AVFilterInOut **open_outputs, AVClass *log_ctx)
+                        AVFilterInOut **open_outputs, void *log_ctx)
 {
     int pad = 0;
 
@@ -284,7 +284,7 @@ static int parse_inputs(const char **buf, AVFilterInOut **curr_inputs,
 
 static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs,
                          AVFilterInOut **open_inputs,
-                         AVFilterInOut **open_outputs, AVClass *log_ctx)
+                         AVFilterInOut **open_outputs, void *log_ctx)
 {
     int ret, pad = 0;
 

From 6119b23a3662d1e106cdf69ef3171b2e7e1d495c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 15:16:02 +0200
Subject: [PATCH 763/830] avfiltergraph: change the syntax of
 avfilter_graph_parse()

Make it returns the list of open inputs and outputs, so it can be
reused by applications.

Breaks API/ABI.
---
 doc/APIchanges              |  3 +++
 ffmpeg.c                    |  2 +-
 ffplay.c                    |  2 +-
 libavfilter/avfilter.h      |  4 ++--
 libavfilter/avfiltergraph.h |  8 +++++---
 libavfilter/graphparser.c   | 20 ++++++++++----------
 6 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 592c268105..40efee8121 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-12 - xxxxxxx - lavfi 2.16.0 - avfilter_graph_parse()
+  Change avfilter_graph_parse() signature.
+
 2011-06-xx - xxxxxxx - lavu 51.6.0 - opt.h
   Add av_opt_flag_is_set().
 
diff --git a/ffmpeg.c b/ffmpeg.c
index 35395a99a4..f69c054bee 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -418,7 +418,7 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
         inputs->pad_idx = 0;
         inputs->next    = NULL;
 
-        if ((ret = avfilter_graph_parse(ost->graph, ost->avfilter, inputs, outputs, NULL)) < 0)
+        if ((ret = avfilter_graph_parse(ost->graph, ost->avfilter, &inputs, &outputs, NULL)) < 0)
             return ret;
         av_freep(&ost->avfilter);
     } else {
diff --git a/ffplay.c b/ffplay.c
index ebe9e05cba..c5f5f75731 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1708,7 +1708,7 @@ static int configure_video_filters(AVFilterGraph *graph, VideoState *is, const c
         inputs->pad_idx = 0;
         inputs->next    = NULL;
 
-        if ((ret = avfilter_graph_parse(graph, vfilters, inputs, outputs, NULL)) < 0)
+        if ((ret = avfilter_graph_parse(graph, vfilters, &inputs, &outputs, NULL)) < 0)
             goto the_end;
         av_freep(&vfilters);
     } else {
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 64d76cc0c9..fbd1dc457f 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,8 +26,8 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 15
-#define LIBAVFILTER_VERSION_MICRO  1
+#define LIBAVFILTER_VERSION_MINOR 16
+#define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h
index 23a7c5138f..538fd2cb95 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/avfiltergraph.h
@@ -112,12 +112,14 @@ typedef struct AVFilterInOut {
  *
  * @param graph   the filter graph where to link the parsed graph context
  * @param filters string to be parsed
- * @param inputs  linked list to the inputs of the graph
- * @param outputs linked list to the outputs of the graph
+ * @param inputs  linked list to the inputs of the graph, may be NULL.
+ *                It is updated to contain the list of open inputs after the parsing.
+ * @param outputs linked list to the outputs of the graph, may be NULL.
+ *                It is updated to contain the list of open outputs after the parsing.
  * @return zero on success, a negative AVERROR code on error
  */
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
-                         AVFilterInOut *inputs, AVFilterInOut *outputs,
+                         AVFilterInOut **inputs, AVFilterInOut **outputs,
                          void *log_ctx);
 
 #endif /* AVFILTER_AVFILTERGRAPH_H */
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index aa1f228f81..ea0c5dda7f 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -328,8 +328,8 @@ static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs,
 }
 
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
-                         AVFilterInOut *open_inputs,
-                         AVFilterInOut *open_outputs, void *log_ctx)
+                         AVFilterInOut **open_inputs, AVFilterInOut **open_outputs,
+                         void *log_ctx)
 {
     int index = 0, ret;
     char chr = 0;
@@ -341,7 +341,7 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
         const char *filterchain = filters;
         filters += strspn(filters, WHITESPACES);
 
-        if ((ret = parse_inputs(&filters, &curr_inputs, &open_outputs, log_ctx)) < 0)
+        if ((ret = parse_inputs(&filters, &curr_inputs, open_outputs, log_ctx)) < 0)
             goto fail;
 
         if ((ret = parse_filter(&filter, &filters, graph, index, log_ctx)) < 0)
@@ -350,14 +350,14 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
         if (filter->input_count == 1 && !curr_inputs && !index) {
             /* First input can be omitted if it is "[in]" */
             const char *tmp = "[in]";
-            if ((ret = parse_inputs(&tmp, &curr_inputs, &open_outputs, log_ctx)) < 0)
+            if ((ret = parse_inputs(&tmp, &curr_inputs, open_outputs, log_ctx)) < 0)
                 goto fail;
         }
 
-        if ((ret = link_filter_inouts(filter, &curr_inputs, &open_inputs, log_ctx)) < 0)
+        if ((ret = link_filter_inouts(filter, &curr_inputs, open_inputs, log_ctx)) < 0)
             goto fail;
 
-        if ((ret = parse_outputs(&filters, &curr_inputs, &open_inputs, &open_outputs,
+        if ((ret = parse_outputs(&filters, &curr_inputs, open_inputs, open_outputs,
                                  log_ctx)) < 0)
             goto fail;
 
@@ -382,10 +382,10 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
         goto fail;
     }
 
-    if (open_inputs && !strcmp(open_inputs->name, "out") && curr_inputs) {
+    if (*open_inputs && !strcmp((*open_inputs)->name, "out") && curr_inputs) {
         /* Last output can be omitted if it is "[out]" */
         const char *tmp = "[out]";
-        if ((ret = parse_outputs(&tmp, &curr_inputs, &open_inputs, &open_outputs,
+        if ((ret = parse_outputs(&tmp, &curr_inputs, open_inputs, open_outputs,
                                  log_ctx)) < 0)
             goto fail;
     }
@@ -396,8 +396,8 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
     for (; graph->filter_count > 0; graph->filter_count--)
         avfilter_free(graph->filters[graph->filter_count - 1]);
     av_freep(&graph->filters);
-    free_inout(open_inputs);
-    free_inout(open_outputs);
+    free_inout(*open_inputs);
+    free_inout(*open_outputs);
     free_inout(curr_inputs);
     return ret;
 }

From c535494268069282cc1147c4d61d4a88ce39e078 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 15:30:46 +0200
Subject: [PATCH 764/830] avfiltergraph: make the AVFilterInOut alloc/free API
 public

This is required for letting applications to create and destroy
AVFilterInOut structs in a convenient way.
---
 doc/APIchanges              |  3 +++
 ffmpeg.c                    |  4 ++--
 ffplay.c                    |  4 ++--
 libavfilter/avfilter.h      |  2 +-
 libavfilter/avfiltergraph.h | 18 ++++++++++++++++--
 libavfilter/graphparser.c   | 23 ++++++++++++++---------
 6 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 40efee8121..d94c553af9 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-12 - xxxxxxx - lavfi 2.17.0 - avfiltergraph.h
+  Add avfilter_inout_alloc() and avfilter_inout_free() functions.
+
 2011-06-12 - xxxxxxx - lavfi 2.16.0 - avfilter_graph_parse()
   Change avfilter_graph_parse() signature.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index f69c054bee..c51f371d5e 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -405,8 +405,8 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost)
     ost->graph->scale_sws_opts = av_strdup(args);
 
     if (ost->avfilter) {
-        AVFilterInOut *outputs = av_malloc(sizeof(AVFilterInOut));
-        AVFilterInOut *inputs  = av_malloc(sizeof(AVFilterInOut));
+        AVFilterInOut *outputs = avfilter_inout_alloc();
+        AVFilterInOut *inputs  = avfilter_inout_alloc();
 
         outputs->name    = av_strdup("in");
         outputs->filter_ctx = last_filter;
diff --git a/ffplay.c b/ffplay.c
index c5f5f75731..4145f9c24b 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1695,8 +1695,8 @@ static int configure_video_filters(AVFilterGraph *graph, VideoState *is, const c
         goto the_end;
 
     if(vfilters) {
-        AVFilterInOut *outputs = av_malloc(sizeof(AVFilterInOut));
-        AVFilterInOut *inputs  = av_malloc(sizeof(AVFilterInOut));
+        AVFilterInOut *outputs = avfilter_inout_alloc();
+        AVFilterInOut *inputs  = avfilter_inout_alloc();
 
         outputs->name    = av_strdup("in");
         outputs->filter_ctx = filt_src;
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index fbd1dc457f..84fa32e64e 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 16
+#define LIBAVFILTER_VERSION_MINOR 17
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h
index 538fd2cb95..a975926fd1 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/avfiltergraph.h
@@ -107,15 +107,29 @@ typedef struct AVFilterInOut {
     struct AVFilterInOut *next;
 } AVFilterInOut;
 
+/**
+ * Create an AVFilterInOut.
+ * Must be free with avfilter_inout_free().
+ */
+AVFilterInOut *avfilter_inout_alloc(void);
+
+/**
+ * Free the AVFilterInOut in *inout, and set its pointer to NULL.
+ * If *inout is NULL, do nothing.
+ */
+void avfilter_inout_free(AVFilterInOut **inout);
+
 /**
  * Add a graph described by a string to a graph.
  *
  * @param graph   the filter graph where to link the parsed graph context
  * @param filters string to be parsed
  * @param inputs  linked list to the inputs of the graph, may be NULL.
- *                It is updated to contain the list of open inputs after the parsing.
+ *                It is updated to contain the list of open inputs after the parsing,
+ *                should be freed with avfilter_inout_free().
  * @param outputs linked list to the outputs of the graph, may be NULL.
- *                It is updated to contain the list of open outputs after the parsing.
+ *                It is updated to contain the list of open outputs after the parsing,
+ *                should be freed with avfilter_inout_free().
  * @return zero on success, a negative AVERROR code on error
  */
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index ea0c5dda7f..d62ba8d205 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -168,13 +168,18 @@ static int parse_filter(AVFilterContext **filt_ctx, const char **buf, AVFilterGr
     return ret;
 }
 
-static void free_inout(AVFilterInOut *head)
+AVFilterInOut *avfilter_inout_alloc(void)
 {
-    while (head) {
-        AVFilterInOut *next = head->next;
-        av_free(head->name);
-        av_free(head);
-        head = next;
+    return av_mallocz(sizeof(AVFilterInOut));
+}
+
+void avfilter_inout_free(AVFilterInOut **inout)
+{
+    while (*inout) {
+        AVFilterInOut *next = (*inout)->next;
+        av_freep(&(*inout)->name);
+        av_freep(inout);
+        *inout = next;
     }
 }
 
@@ -396,8 +401,8 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
     for (; graph->filter_count > 0; graph->filter_count--)
         avfilter_free(graph->filters[graph->filter_count - 1]);
     av_freep(&graph->filters);
-    free_inout(*open_inputs);
-    free_inout(*open_outputs);
-    free_inout(curr_inputs);
+    avfilter_inout_free(open_inputs);
+    avfilter_inout_free(open_outputs);
+    avfilter_inout_free(&curr_inputs);
     return ret;
 }

From 3fe6bbd5dcdf603a8dac78b48adf6f0b6604143c Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 11 Jun 2011 18:21:53 +0200
Subject: [PATCH 765/830] libavfilter: implement
 avfilter_fill_frame_from_video_buffer_ref()

---
 doc/APIchanges         |  4 ++++
 libavfilter/avcodec.c  | 18 ++++++++++++++++++
 libavfilter/avcodec.h  | 11 +++++++++++
 libavfilter/avfilter.h |  2 +-
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index d94c553af9..e8dc0642fa 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-06-12 - xxxxxxx - lavfi 2.18.0 - avcodec.h
+  Add avfilter_get_video_buffer_ref_from_frame() function in
+  libavfilter/avcodec.h.
+
 2011-06-12 - xxxxxxx - lavfi 2.17.0 - avfiltergraph.h
   Add avfilter_inout_alloc() and avfilter_inout_free() functions.
 
diff --git a/libavfilter/avcodec.c b/libavfilter/avcodec.c
index 50670bc55e..2010040d14 100644
--- a/libavfilter/avcodec.c
+++ b/libavfilter/avcodec.c
@@ -53,3 +53,21 @@ AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame
     avfilter_copy_frame_props(picref, frame);
     return picref;
 }
+
+int avfilter_fill_frame_from_video_buffer_ref(AVFrame *frame,
+                                              const AVFilterBufferRef *picref)
+{
+    if (!picref || !picref->video || !frame)
+        return AVERROR(EINVAL);
+
+    memcpy(frame->data,     picref->data,     sizeof(frame->data));
+    memcpy(frame->linesize, picref->linesize, sizeof(frame->linesize));
+    frame->pkt_pos          = picref->pos;
+    frame->interlaced_frame = picref->video->interlaced;
+    frame->top_field_first  = picref->video->top_field_first;
+    frame->key_frame        = picref->video->key_frame;
+    frame->pict_type        = picref->video->pict_type;
+    frame->sample_aspect_ratio = picref->video->sample_aspect_ratio;
+
+    return 0;
+}
diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h
index 4eed6b2d2c..dec5ae4a7c 100644
--- a/libavfilter/avcodec.h
+++ b/libavfilter/avcodec.h
@@ -46,6 +46,17 @@ void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src);
  */
 AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, int perms);
 
+/**
+ * Fill an AVFrame with the information stored in picref.
+ *
+ * @param frame an already allocated AVFrame
+ * @param picref a video buffer reference
+ * @return 0 in case of success, a negative AVERROR code in case of
+ * failure
+ */
+int avfilter_fill_frame_from_video_buffer_ref(AVFrame *frame,
+                                              const AVFilterBufferRef *picref);
+
 /**
  * Add frame data to buffer_src.
  *
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 84fa32e64e..7628cd51ec 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -26,7 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 17
+#define LIBAVFILTER_VERSION_MINOR 18
 #define LIBAVFILTER_VERSION_MICRO  0
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

From f0e9ee8d9491898bec012f1295d83c6efa937c32 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 13 Jun 2011 03:00:53 +0200
Subject: [PATCH 766/830] Revert "H264: Split out hl_motion and template it,
 this seems a bit faster"

This reverts commit a50f0bea25a3da605cd547fe3bdfd36c8764b847.

This has been implemented differently in qatar and its better they
maintain it for me instead of me having to spend an average 5sec more
per merge

Conflicts:

	libavcodec/h264.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/Makefile         |   6 +-
 libavcodec/h264.c           | 285 +++++++++++++++++++++++++++++++++++-
 libavcodec/h264.h           |   6 -
 libavcodec/h264_hl_motion.c | 164 ---------------------
 libavcodec/h264_hl_motion.h | 282 -----------------------------------
 5 files changed, 287 insertions(+), 456 deletions(-)
 delete mode 100644 libavcodec/h264_hl_motion.c
 delete mode 100644 libavcodec/h264_hl_motion.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 158014553c..aa091bf2e5 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -177,7 +177,7 @@ OBJS-$(CONFIG_H263_ENCODER)            += mpegvideo_enc.o mpeg4video.o      \
                                           ratecontrol.o h263.o ituh263enc.o \
                                           flvenc.o mpeg12data.o             \
                                           mpegvideo.o error_resilience.o
-OBJS-$(CONFIG_H264_DECODER)            += h264.o h264_hl_motion.o              \
+OBJS-$(CONFIG_H264_DECODER)            += h264.o                               \
                                           h264_loopfilter.o h264_direct.o      \
                                           cabac.o h264_sei.o h264_ps.o         \
                                           h264_refs.o h264_cavlc.o h264_cabac.o\
@@ -368,7 +368,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
                                           mpegvideo.o error_resilience.o \
                                           ituh263enc.o mpegvideo_enc.o   \
                                           ratecontrol.o mpeg12data.o
-OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o h264_hl_motion.o      \
+OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o                       \
                                           h264_loopfilter.o h264_direct.o     \
                                           h264_sei.o h264_ps.o h264_refs.o    \
                                           h264_cavlc.o h264_cabac.o cabac.o   \
@@ -612,7 +612,7 @@ OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o \
                                           vorbis_data.o
 OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
 OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
-OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264.o h264_hl_motion.o \
+OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264.o            \
                                           cabac.o                         \
                                           h264_refs.o h264_sei.o h264_direct.o \
                                           h264_loopfilter.o h264_cabac.o \
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 7d54e1c6c1..a4619e607a 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -447,6 +447,289 @@ static void chroma_dc_dct_c(DCTELEM *block){
 }
 #endif
 
+static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int src_x_offset, int src_y_offset,
+                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
+    MpegEncContext * const s = &h->s;
+    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
+    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
+    const int luma_xy= (mx&3) + ((my&3)<<2);
+    uint8_t * src_y = pic->data[0] + ((mx>>2)<<h->pixel_shift) + (my>>2)*h->mb_linesize;
+    uint8_t * src_cb, * src_cr;
+    int extra_width= h->emu_edge_width;
+    int extra_height= h->emu_edge_height;
+    int emu=0;
+    const int full_mx= mx>>2;
+    const int full_my= my>>2;
+    const int pic_width  = 16*s->mb_width;
+    const int pic_height = 16*s->mb_height >> MB_FIELD;
+
+    if(mx&7) extra_width -= 3;
+    if(my&7) extra_height -= 3;
+
+    if(   full_mx < 0-extra_width
+       || full_my < 0-extra_height
+       || full_mx + 16/*FIXME*/ > pic_width + extra_width
+       || full_my + 16/*FIXME*/ > pic_height + extra_height){
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<h->pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + (2<<h->pixel_shift) + 2*h->mb_linesize;
+        emu=1;
+    }
+
+    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
+    if(!square){
+        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
+    }
+
+    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
+
+    if(MB_FIELD){
+        // chroma offset when predicting from a field of opposite parity
+        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
+        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
+    }
+    src_cb= pic->data[1] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
+
+    if(emu){
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+            src_cb= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
+
+    if(emu){
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+            src_cr= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
+}
+
+static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
+                           int list0, int list1){
+    MpegEncContext * const s = &h->s;
+    qpel_mc_func *qpix_op=  qpix_put;
+    h264_chroma_mc_func chroma_op= chroma_put;
+
+    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    x_offset += 8*s->mb_x;
+    y_offset += 8*(s->mb_y >> MB_FIELD);
+
+    if(list0){
+        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
+                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                           qpix_op, chroma_op);
+
+        qpix_op=  qpix_avg;
+        chroma_op= chroma_avg;
+    }
+
+    if(list1){
+        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
+                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                           qpix_op, chroma_op);
+    }
+}
+
+static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
+                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
+                           int list0, int list1){
+    MpegEncContext * const s = &h->s;
+
+    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    x_offset += 8*s->mb_x;
+    y_offset += 8*(s->mb_y >> MB_FIELD);
+
+    if(list0 && list1){
+        /* don't optimize for luma-only case, since B-frames usually
+         * use implicit weights => chroma too. */
+        uint8_t *tmp_cb = s->obmc_scratchpad;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<h->pixel_shift);
+        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
+        int refn0 = h->ref_cache[0][ scan8[n] ];
+        int refn1 = h->ref_cache[1][ scan8[n] ];
+
+        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
+                    dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
+                    tmp_y, tmp_cb, tmp_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+
+        if(h->use_weight == 2){
+            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
+            int weight1 = 64 - weight0;
+            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
+        }else{
+            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
+                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
+                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
+                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
+                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
+        }
+    }else{
+        int list = list1 ? 1 : 0;
+        int refn = h->ref_cache[list][ scan8[n] ];
+        Picture *ref= &h->ref_list[list][refn];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put, chroma_put);
+
+        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
+                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
+        if(h->use_weight_chroma){
+            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
+            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
+        }
+    }
+}
+
+static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
+                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
+                           int list0, int list1){
+    if((h->use_weight==2 && list0 && list1
+        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
+       || h->use_weight==1)
+        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                         x_offset, y_offset, qpix_put, chroma_put,
+                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
+    else
+        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
+}
+
+static inline void prefetch_motion(H264Context *h, int list){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    MpegEncContext * const s = &h->s;
+    const int refn = h->ref_cache[list][scan8[0]];
+    if(refn >= 0){
+        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
+        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
+        uint8_t **src= h->ref_list[list][refn].data;
+        int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
+        s->dsp.prefetch(src[0]+off, s->linesize, 4);
+        off= (((mx>>1)+64)<<h->pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
+        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+    }
+}
+
+static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
+                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
+                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+
+    assert(IS_INTER(mb_type));
+
+    if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
+        await_references(h);
+    prefetch_motion(h, 0);
+
+    if(IS_16X16(mb_type)){
+        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
+                weight_op, weight_avg,
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+    }else if(IS_16X8(mb_type)){
+        mc_part(h, 0, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+        mc_part(h, 8, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 4,
+                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+    }else if(IS_8X16(mb_type)){
+        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
+                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+    }else{
+        int i;
+
+        assert(IS_8X8(mb_type));
+
+        for(i=0; i<4; i++){
+            const int sub_mb_type= h->sub_mb_type[i];
+            const int n= 4*i;
+            int x_offset= (i&1)<<2;
+            int y_offset= (i&2)<<1;
+
+            if(IS_SUB_8X8(sub_mb_type)){
+                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                    &weight_op[3], &weight_avg[3],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else if(IS_SUB_8X4(sub_mb_type)){
+                mc_part(h, n  , 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                mc_part(h, n+2, 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
+                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else if(IS_SUB_4X8(sub_mb_type)){
+                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
+                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else{
+                int j;
+                assert(IS_SUB_4X4(sub_mb_type));
+                for(j=0; j<4; j++){
+                    int sub_x_offset= x_offset + 2*(j&1);
+                    int sub_y_offset= y_offset +   (j&2);
+                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
+                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                        &weight_op[6], &weight_avg[6],
+                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                }
+            }
+        }
+    }
+
+    prefetch_motion(h, 1);
+}
+
+
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
     H264Context *hx;
@@ -1425,7 +1708,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
         }else if(is_h264){
-            ff_hl_motion(h, dest_y, dest_cb, dest_cr,
+            hl_motion(h, dest_y, dest_cb, dest_cr,
                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                       h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 04da701750..c075c4b48e 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -710,12 +710,6 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
 void ff_h264_reset_sei(H264Context *h);
 
 
-void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
-                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
-                      h264_weight_func *weight_op, h264_biweight_func *weight_avg);
-
-
 /*
 o-o o-o
  / / /
diff --git a/libavcodec/h264_hl_motion.c b/libavcodec/h264_hl_motion.c
deleted file mode 100644
index 654b8b8444..0000000000
--- a/libavcodec/h264_hl_motion.c
+++ /dev/null
@@ -1,164 +0,0 @@
-
-#include "h264.h"
-#include "thread.h"
-
-static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
-                                 int y_offset, int list){
-    int raw_my= h->mv_cache[list][ scan8[n] ][1];
-    int filter_height= (raw_my&3) ? 2 : 0;
-    int full_my= (raw_my>>2) + y_offset;
-    int top = full_my - filter_height, bottom = full_my + height + filter_height;
-
-    return FFMAX(abs(top), bottom);
-}
-
-static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
-                               int y_offset, int list0, int list1, int *nrefs){
-    MpegEncContext * const s = &h->s;
-    int my;
-
-    y_offset += 16*(s->mb_y >> MB_FIELD);
-
-    if(list0){
-        int ref_n = h->ref_cache[0][ scan8[n] ];
-        Picture *ref= &h->ref_list[0][ref_n];
-
-        // Error resilience puts the current picture in the ref list.
-        // Don't try to wait on these as it will cause a deadlock.
-        // Fields can wait on each other, though.
-        if(ref->thread_opaque != s->current_picture.thread_opaque ||
-           (ref->reference&3) != s->picture_structure) {
-            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
-            if (refs[0][ref_n] < 0) nrefs[0] += 1;
-            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
-        }
-    }
-
-    if(list1){
-        int ref_n = h->ref_cache[1][ scan8[n] ];
-        Picture *ref= &h->ref_list[1][ref_n];
-
-        if(ref->thread_opaque != s->current_picture.thread_opaque ||
-           (ref->reference&3) != s->picture_structure) {
-            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
-            if (refs[1][ref_n] < 0) nrefs[1] += 1;
-            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
-        }
-    }
-}
-
-/**
- * Wait until all reference frames are available for MC operations.
- *
- * @param h the H264 context
- */
-static void await_references(H264Context *h){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
-    const int mb_type= s->current_picture.mb_type[mb_xy];
-    int refs[2][48];
-    int nrefs[2] = {0};
-    int ref, list;
-
-    memset(refs, -1, sizeof(refs));
-
-    if(IS_16X16(mb_type)){
-        get_lowest_part_y(h, refs, 0, 16, 0,
-                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
-    }else if(IS_16X8(mb_type)){
-        get_lowest_part_y(h, refs, 0, 8, 0,
-                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
-        get_lowest_part_y(h, refs, 8, 8, 8,
-                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
-    }else if(IS_8X16(mb_type)){
-        get_lowest_part_y(h, refs, 0, 16, 0,
-                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
-        get_lowest_part_y(h, refs, 4, 16, 0,
-                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
-    }else{
-        int i;
-
-        assert(IS_8X8(mb_type));
-
-        for(i=0; i<4; i++){
-            const int sub_mb_type= h->sub_mb_type[i];
-            const int n= 4*i;
-            int y_offset= (i&2)<<2;
-
-            if(IS_SUB_8X8(sub_mb_type)){
-                get_lowest_part_y(h, refs, n  , 8, y_offset,
-                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-            }else if(IS_SUB_8X4(sub_mb_type)){
-                get_lowest_part_y(h, refs, n  , 4, y_offset,
-                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-                get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
-                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-            }else if(IS_SUB_4X8(sub_mb_type)){
-                get_lowest_part_y(h, refs, n  , 8, y_offset,
-                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-                get_lowest_part_y(h, refs, n+1, 8, y_offset,
-                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-            }else{
-                int j;
-                assert(IS_SUB_4X4(sub_mb_type));
-                for(j=0; j<4; j++){
-                    int sub_y_offset= y_offset + 2*(j&2);
-                    get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
-                              IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
-                }
-            }
-        }
-    }
-
-    for(list=h->list_count-1; list>=0; list--){
-        for(ref=0; ref<48 && nrefs[list]; ref++){
-            int row = refs[list][ref];
-            if(row >= 0){
-                Picture *ref_pic = &h->ref_list[list][ref];
-                int ref_field = ref_pic->reference - 1;
-                int ref_field_picture = ref_pic->field_picture;
-                int pic_height = 16*s->mb_height >> ref_field_picture;
-
-                row <<= MB_MBAFF;
-                nrefs[list]--;
-
-                if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
-                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
-                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
-                }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
-                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
-                }else if(FIELD_PICTURE){
-                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
-                }else{
-                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
-                }
-            }
-        }
-    }
-}
-
-#define FUNC(a) a ## _8
-#define PIXEL_SHIFT 0
-#include "h264_hl_motion.h"
-
-#undef PIXEL_SHIFT
-#undef FUNC
-#define FUNC(a) a ## _16
-#define PIXEL_SHIFT 1
-#include "h264_hl_motion.h"
-
-void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
-                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
-                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
-    if(h->pixel_shift){
-        hl_motion_16(h, dest_y, dest_cb, dest_cr,
-                      qpix_put, chroma_put,
-                      qpix_avg, chroma_avg,
-                      weight_op, weight_avg);
-    }else
-        hl_motion_8(h, dest_y, dest_cb, dest_cr,
-                      qpix_put, chroma_put,
-                      qpix_avg, chroma_avg,
-                      weight_op, weight_avg);
-}
diff --git a/libavcodec/h264_hl_motion.h b/libavcodec/h264_hl_motion.h
deleted file mode 100644
index f354251fc9..0000000000
--- a/libavcodec/h264_hl_motion.h
+++ /dev/null
@@ -1,282 +0,0 @@
-
-static inline void FUNC(mc_dir_part)(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
-                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                           int src_x_offset, int src_y_offset,
-                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
-    MpegEncContext * const s = &h->s;
-    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
-    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
-    const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + ((mx>>2)<<PIXEL_SHIFT) + (my>>2)*h->mb_linesize;
-    uint8_t * src_cb, * src_cr;
-    int extra_width= h->emu_edge_width;
-    int extra_height= h->emu_edge_height;
-    int emu=0;
-    const int full_mx= mx>>2;
-    const int full_my= my>>2;
-    const int pic_width  = 16*s->mb_width;
-    const int pic_height = 16*s->mb_height >> MB_FIELD;
-
-    if(mx&7) extra_width -= 3;
-    if(my&7) extra_height -= 3;
-
-    if(   full_mx < 0-extra_width
-       || full_my < 0-extra_height
-       || full_mx + 16/*FIXME*/ > pic_width + extra_width
-       || full_my + 16/*FIXME*/ > pic_height + extra_height){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<PIXEL_SHIFT) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_y= s->edge_emu_buffer + (2<<PIXEL_SHIFT) + 2*h->mb_linesize;
-        emu=1;
-    }
-
-    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
-    if(!square){
-        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
-    }
-
-    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
-
-    if(MB_FIELD){
-        // chroma offset when predicting from a field of opposite parity
-        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
-        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
-    }
-    src_cb= pic->data[1] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
-    src_cr= pic->data[2] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
-
-    if(emu){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
-            src_cb= s->edge_emu_buffer;
-    }
-    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
-
-    if(emu){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
-            src_cr= s->edge_emu_buffer;
-    }
-    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
-}
-
-static inline void FUNC(mc_part_std)(H264Context *h, int n, int square, int chroma_height, int delta,
-                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                           int x_offset, int y_offset,
-                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
-                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1){
-    MpegEncContext * const s = &h->s;
-    qpel_mc_func *qpix_op=  qpix_put;
-    h264_chroma_mc_func chroma_op= chroma_put;
-
-    dest_y  += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
-    x_offset += 8*s->mb_x;
-    y_offset += 8*(s->mb_y >> MB_FIELD);
-
-    if(list0){
-        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
-        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 0,
-                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
-
-        qpix_op=  qpix_avg;
-        chroma_op= chroma_avg;
-    }
-
-    if(list1){
-        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
-        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 1,
-                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
-    }
-}
-
-static inline void FUNC(mc_part_weighted)(H264Context *h, int n, int square, int chroma_height, int delta,
-                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                           int x_offset, int y_offset,
-                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
-                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
-                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1){
-    MpegEncContext * const s = &h->s;
-
-    dest_y  += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
-    x_offset += 8*s->mb_x;
-    y_offset += 8*(s->mb_y >> MB_FIELD);
-
-    if(list0 && list1){
-        /* don't optimize for luma-only case, since B-frames usually
-         * use implicit weights => chroma too. */
-        uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<PIXEL_SHIFT);
-        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
-        int refn0 = h->ref_cache[0][ scan8[n] ];
-        int refn1 = h->ref_cache[1][ scan8[n] ];
-
-        FUNC(mc_dir_part)(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
-                    dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
-        FUNC(mc_dir_part)(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
-                    tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
-
-        if(h->use_weight == 2){
-            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
-            int weight1 = 64 - weight0;
-            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
-            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
-            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
-        }else{
-            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
-                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
-                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
-            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
-                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
-            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
-                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
-        }
-    }else{
-        int list = list1 ? 1 : 0;
-        int refn = h->ref_cache[list][ scan8[n] ];
-        Picture *ref= &h->ref_list[list][refn];
-        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, list,
-                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put);
-
-        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
-                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
-        if(h->use_weight_chroma){
-            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
-            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
-        }
-    }
-}
-
-static inline void FUNC(mc_part)(H264Context *h, int n, int square, int chroma_height, int delta,
-                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                           int x_offset, int y_offset,
-                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
-                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1){
-    if((h->use_weight==2 && list0 && list1
-        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
-       || h->use_weight==1)
-        FUNC(mc_part_weighted)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
-                         x_offset, y_offset, qpix_put, chroma_put,
-                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
-    else
-        FUNC(mc_part_std)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
-}
-
-static inline void FUNC(prefetch_motion)(H264Context *h, int list){
-    /* fetch pixels for estimated mv 4 macroblocks ahead
-     * optimized for 64byte cache lines */
-    MpegEncContext * const s = &h->s;
-    const int refn = h->ref_cache[list][scan8[0]];
-    if(refn >= 0){
-        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
-        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
-        uint8_t **src= h->ref_list[list][refn].data;
-        int off= ((mx+64)<<PIXEL_SHIFT) + (my + (s->mb_x&3)*4)*h->mb_linesize;
-        s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        off= (((mx>>1)+64)<<PIXEL_SHIFT) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
-        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
-    }
-}
-
-static void FUNC(hl_motion)(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
-                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
-                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
-    const int mb_type= s->current_picture.mb_type[mb_xy];
-
-    assert(IS_INTER(mb_type));
-
-    if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
-        await_references(h);
-    FUNC(prefetch_motion)(h, 0);
-
-    if(IS_16X16(mb_type)){
-        FUNC(mc_part)(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
-                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
-                weight_op, weight_avg,
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-    }else if(IS_16X8(mb_type)){
-        FUNC(mc_part)(h, 0, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 0,
-                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
-                &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        FUNC(mc_part)(h, 8, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 4,
-                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
-                &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
-    }else if(IS_8X16(mb_type)){
-        FUNC(mc_part)(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
-                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
-                &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        FUNC(mc_part)(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
-                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
-                &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
-    }else{
-        int i;
-
-        assert(IS_8X8(mb_type));
-
-        for(i=0; i<4; i++){
-            const int sub_mb_type= h->sub_mb_type[i];
-            const int n= 4*i;
-            int x_offset= (i&1)<<2;
-            int y_offset= (i&2)<<1;
-
-            if(IS_SUB_8X8(sub_mb_type)){
-                FUNC(mc_part)(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
-                    &weight_op[3], &weight_avg[3],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-            }else if(IS_SUB_8X4(sub_mb_type)){
-                FUNC(mc_part)(h, n  , 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
-                    &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                FUNC(mc_part)(h, n+2, 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
-                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
-                    &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-            }else if(IS_SUB_4X8(sub_mb_type)){
-                FUNC(mc_part)(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
-                    &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                FUNC(mc_part)(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
-                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
-                    &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-            }else{
-                int j;
-                assert(IS_SUB_4X4(sub_mb_type));
-                for(j=0; j<4; j++){
-                    int sub_x_offset= x_offset + 2*(j&1);
-                    int sub_y_offset= y_offset +   (j&2);
-                    FUNC(mc_part)(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
-                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
-                        &weight_op[6], &weight_avg[6],
-                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                }
-            }
-        }
-    }
-
-    FUNC(prefetch_motion)(h, 1);
-}

From 21d5de930360bb29482aabd869074b9fd7b74ce3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 13 Jun 2011 03:30:56 +0200
Subject: [PATCH 767/830] H264: Reduce pointless diffs to qatar

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c | 137 +++++++++++++++++++++++++++++-----------------
 1 file changed, 87 insertions(+), 50 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index a4619e607a..4da2807663 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -450,12 +450,13 @@ static void chroma_dc_dct_c(DCTELEM *block){
 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
-                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
+                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
+                           int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + ((mx>>2)<<h->pixel_shift) + (my>>2)*h->mb_linesize;
+    uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -472,8 +473,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
        || full_my < 0-extra_height
        || full_mx + 16/*FIXME*/ > pic_width + extra_width
        || full_my + 16/*FIXME*/ > pic_height + extra_height){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<h->pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_y= s->edge_emu_buffer + (2<<h->pixel_shift) + 2*h->mb_linesize;
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
         emu=1;
     }
 
@@ -489,8 +490,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
     }
-    src_cb= pic->data[1] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
-    src_cr= pic->data[2] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
+    src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
 
     if(emu){
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
@@ -510,14 +511,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -525,7 +526,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
+                           qpix_op, chroma_op, pixel_shift);
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
@@ -535,7 +536,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
+                           qpix_op, chroma_op, pixel_shift);
     }
 }
 
@@ -545,12 +546,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
 
-    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -558,17 +559,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<h->pixel_shift);
+        uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
@@ -593,7 +594,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put);
+                    qpix_put, chroma_put, pixel_shift);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
@@ -612,19 +613,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
-                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
+                         weight_op[0], weight_op[3], weight_avg[0],
+                         weight_avg[3], list0, list1, pixel_shift);
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
+                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
+                    chroma_avg, list0, list1, pixel_shift);
 }
 
-static inline void prefetch_motion(H264Context *h, int list){
+static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
@@ -640,43 +643,49 @@ static inline void prefetch_motion(H264Context *h, int list){
     }
 }
 
-static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
-                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
+                      h264_weight_func *weight_op, h264_biweight_func *weight_avg,
+                      int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
 
     assert(IS_INTER(mb_type));
 
-    if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
+    if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h);
-    prefetch_motion(h, 0);
+    prefetch_motion(h, 0, pixel_shift);
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
     }else if(IS_16X8(mb_type)){
-        mc_part(h, 0, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 8, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 4,
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
+        mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
+                pixel_shift);
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
+                pixel_shift);
     }else{
         int i;
 
@@ -692,25 +701,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else if(IS_SUB_8X4(sub_mb_type)){
-                mc_part(h, n  , 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n+2, 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
+                mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
@@ -720,15 +734,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
-                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                        pixel_shift);
                 }
             }
         }
     }
 
-    prefetch_motion(h, 1);
+    prefetch_motion(h, 1, pixel_shift);
 }
 
+#define hl_motion_fn(sh, bits) \
+static av_always_inline void hl_motion_ ## bits(H264Context *h, \
+                                       uint8_t *dest_y, \
+                                       uint8_t *dest_cb, uint8_t *dest_cr, \
+                                       qpel_mc_func (*qpix_put)[16], \
+                                       h264_chroma_mc_func (*chroma_put), \
+                                       qpel_mc_func (*qpix_avg)[16], \
+                                       h264_chroma_mc_func (*chroma_avg), \
+                                       h264_weight_func *weight_op, \
+                                       h264_biweight_func *weight_avg) \
+{ \
+    hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
+              qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
+}
+hl_motion_fn(0, 8);
+hl_motion_fn(1, 16);
 
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
@@ -1002,7 +1033,6 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
 
     ff_h264_decode_init_vlc();
 
-    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
     h->pixel_shift = 0;
     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
 
@@ -1030,7 +1060,6 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     return 0;
 }
 
-
 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
 {
@@ -1708,10 +1737,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
         }else if(is_h264){
-            hl_motion(h, dest_y, dest_cb, dest_cr,
-                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                      h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
+            if (pixel_shift) {
+                hl_motion_16(h, dest_y, dest_cb, dest_cr,
+                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                             h->h264dsp.weight_h264_pixels_tab,
+                             h->h264dsp.biweight_h264_pixels_tab);
+            } else
+                hl_motion_8(h, dest_y, dest_cb, dest_cr,
+                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                            h->h264dsp.weight_h264_pixels_tab,
+                            h->h264dsp.biweight_h264_pixels_tab);
         }
 
 

From b95d19b02022ea9590bee571aa245ab93f37d152 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 13 Jun 2011 08:56:54 +0200
Subject: [PATCH 768/830] Add some (important) changelog entries

---
 Changelog | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Changelog b/Changelog
index da948c6d73..38f07b1440 100644
--- a/Changelog
+++ b/Changelog
@@ -5,6 +5,10 @@ releases are sorted from youngest to oldest.
 version <next>:
 
 - E-AC-3 audio encoder
+- ac3enc: add channel coupling support
+- floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders.
+- H264/MPEG frame-level multi-threading
+- All av_metadata_* functions renamed to av_dict_* and moved to libavutil
 
 
 version 0.7_beta2:

From 5c46ad1da049f16e670d2549161c244c6ddd68ec Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 11 Jun 2011 22:53:32 +0100
Subject: [PATCH 769/830] ARM: optimised mpadsp_apply_window_fixed

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/Makefile                   |   3 +
 libavcodec/arm/mpegaudiodsp_fixed_armv6.S | 143 ++++++++++++++++++++++
 libavcodec/arm/mpegaudiodsp_init_arm.c    |  33 +++++
 libavcodec/mpegaudiodsp.c                 |   1 +
 libavcodec/mpegaudiodsp.h                 |   1 +
 5 files changed, 181 insertions(+)
 create mode 100644 libavcodec/arm/mpegaudiodsp_fixed_armv6.S
 create mode 100644 libavcodec/arm/mpegaudiodsp_init_arm.c

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a5abfdd128..3374f0e2bd 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -5,6 +5,9 @@ OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 
 ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
 
+OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
+ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
+
 OBJS-$(CONFIG_VP5_DECODER)             += arm/vp56dsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp56dsp_init_arm.o
 OBJS-$(CONFIG_VP8_DECODER)             += arm/vp8dsp_init_arm.o
diff --git a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
new file mode 100644
index 0000000000..9ec731480b
--- /dev/null
+++ b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+.macro  skip            args:vararg
+.endm
+
+.macro  sum8            lo,  hi,  w, p, t1, t2, t3, t4, rsb=skip, offs=0
+        ldr             \t1, [\w, #4*\offs]
+        ldr             \t2, [\p, #4]!
+        \rsb            \t1, \t1, #0
+  .irpc i, 135
+        ldr             \t3, [\w, #4*64*\i+4*\offs]
+        ldr             \t4, [\p, #4*64*\i]
+        smlal           \lo, \hi, \t1, \t2
+        \rsb            \t3, \t3, #0
+        ldr             \t1, [\w, #4*64*(\i+1)+4*\offs]
+        ldr             \t2, [\p, #4*64*(\i+1)]
+        smlal           \lo, \hi, \t3, \t4
+        \rsb            \t1, \t1, #0
+  .endr
+        ldr             \t3, [\w, #4*64*7+4*\offs]
+        ldr             \t4, [\p, #4*64*7]
+        smlal           \lo, \hi, \t1, \t2
+        \rsb            \t3, \t3, #0
+        smlal           \lo, \hi, \t3, \t4
+.endm
+
+.macro  round           rd,  lo,  hi
+        lsr             \rd, \lo, #24
+        bic             \lo, \lo, #0xff000000
+        orr             \rd, \rd, \hi, lsl #8
+        mov             \hi, #0
+        ssat            \rd, #16, \rd
+.endm
+
+function ff_mpadsp_apply_window_fixed_armv6, export=1
+        push            {r2,r4-r11,lr}
+
+        add             r4,  r0,  #4*512        @ synth_buf + 512
+    .rept 4
+        ldm             r0!, {r5-r12}
+        stm             r4!, {r5-r12}
+    .endr
+
+        ldr             r4,  [sp, #40]          @ incr
+        sub             r0,  r0,  #4*17         @ synth_buf + 16
+        ldr             r8,  [r2]               @ sum:low
+        add             r2,  r0,  #4*32         @ synth_buf + 48
+        rsb             r5,  r4,  r4,  lsl #5   @ 31 * incr
+        lsl             r4,  r4,  #1
+        asr             r9,  r8,  #31           @ sum:high
+        add             r5,  r3,  r5,  lsl #1   @ samples2
+        add             r6,  r1,  #4*32         @ w2
+        str             r4,  [sp, #40]
+
+        sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr
+        sum8            r8,  r9,  r1,  r2,  r10, r11, r12, lr, rsb, 32
+        round           r10, r8,  r9
+        strh            r10, [r3], r4
+
+        mov             lr,  #15
+1:
+        ldr             r12, [r0, #4]!
+        ldr             r11, [r6, #-4]!
+        ldr             r10, [r1, #4]!
+  .irpc i, 0246
+    .if \i
+        ldr             r11, [r6, #4*64*\i]
+        ldr             r10, [r1, #4*64*\i]
+    .endif
+        rsb             r11, r11, #0
+        smlal           r8,  r9,  r10, r12
+        ldr             r10, [r0, #4*64*(\i+1)]
+    .ifeq \i
+        smull           r4, r7, r11, r12
+    .else
+        smlal           r4, r7, r11, r12
+    .endif
+        ldr             r11, [r6, #4*64*(\i+1)]
+        ldr             r12, [r1, #4*64*(\i+1)]
+        rsb             r11, r11, #0
+        smlal           r8,  r9,  r12, r10
+    .iflt \i-6
+        ldr             r12, [r0, #4*64*(\i+2)]
+    .else
+        ldr             r12, [r2, #-4]!
+    .endif
+        smlal           r4,  r7,  r11, r10
+  .endr
+  .irpc i, 0246
+        ldr             r10, [r1, #4*64*\i+4*32]
+        rsb             r12, r12, #0
+        ldr             r11, [r6, #4*64*\i+4*32]
+        smlal           r8,  r9,  r10, r12
+        ldr             r10, [r2, #4*64*(\i+1)]
+        smlal           r4,  r7,  r11, r12
+        ldr             r12, [r1, #4*64*(\i+1)+4*32]
+        rsb             r10, r10, #0
+        ldr             r11, [r6, #4*64*(\i+1)+4*32]
+        smlal           r8,  r9,  r12, r10
+    .iflt \i-6
+        ldr             r12, [r2, #4*64*(\i+2)]
+    .else
+        ldr             r12, [sp, #40]
+    .endif
+        smlal           r4,  r7,  r11, r10
+  .endr
+        round           r10, r8,  r9
+        adds            r8,  r8,  r4
+        adc             r9,  r9,  r7
+        strh            r10, [r3], r12
+        round           r11, r8,  r9
+        subs            lr,  lr,  #1
+        strh            r11, [r5], -r12
+        bgt             1b
+
+        sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr, rsb, 33
+        pop             {r4}
+        round           r10, r8,  r9
+        str             r8,  [r4]
+        strh            r10, [r3]
+
+        pop             {r4-r11,pc}
+endfunc
diff --git a/libavcodec/arm/mpegaudiodsp_init_arm.c b/libavcodec/arm/mpegaudiodsp_init_arm.c
new file mode 100644
index 0000000000..94a55787ad
--- /dev/null
+++ b/libavcodec/arm/mpegaudiodsp_init_arm.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavcodec/mpegaudiodsp.h"
+#include "config.h"
+
+void ff_mpadsp_apply_window_fixed_armv6(int32_t *synth_buf, int32_t *window,
+                                        int *dither, int16_t *out, int incr);
+
+void ff_mpadsp_init_arm(MPADSPContext *s)
+{
+    if (HAVE_ARMV6) {
+        s->apply_window_fixed = ff_mpadsp_apply_window_fixed_armv6;
+    }
+}
diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c
index 57fe962b91..438b097d06 100644
--- a/libavcodec/mpegaudiodsp.c
+++ b/libavcodec/mpegaudiodsp.c
@@ -35,6 +35,7 @@ void ff_mpadsp_init(MPADSPContext *s)
     s->dct32_float = dct.dct32;
     s->dct32_fixed = ff_dct32_fixed;
 
+    if (ARCH_ARM)     ff_mpadsp_init_arm(s);
     if (HAVE_MMX)     ff_mpadsp_init_mmx(s);
     if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s);
 }
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index a47019cc4b..8a18db8325 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -47,6 +47,7 @@ void ff_mpa_synth_filter_float(MPADSPContext *s,
                                float *samples, int incr,
                                float *sb_samples);
 
+void ff_mpadsp_init_arm(MPADSPContext *s);
 void ff_mpadsp_init_mmx(MPADSPContext *s);
 void ff_mpadsp_init_altivec(MPADSPContext *s);
 

From 77cdfde73e91cdbcc82cdec6b8fec6f646b02782 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Jun 2011 09:24:27 +0100
Subject: [PATCH 770/830] ARM: jrevdct_arm: misc cleanup

- use 'const' macro to define coeff table
- add missing endfunc
- remove superflous directives

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/jrevdct_arm.S | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/jrevdct_arm.S b/libavcodec/arm/jrevdct_arm.S
index 4fcf35101d..f7fd5c7bde 100644
--- a/libavcodec/arm/jrevdct_arm.S
+++ b/libavcodec/arm/jrevdct_arm.S
@@ -54,18 +54,15 @@
 #define FIX_M_1_961570560_ID   40
 #define FIX_M_2_562915447_ID   44
 #define FIX_0xFFFF_ID          48
-        .text
-        .align
 
 function ff_j_rev_dct_arm, export=1
         stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
-
         sub sp, sp, #4                  @ reserve some space on the stack
         str r0, [ sp ]                  @ save the DCT pointer to the stack
 
         mov lr, r0                      @ lr = pointer to the current row
         mov r12, #8                     @ r12 = row-counter
-        adr r11, const_array            @ r11 = base pointer to the constants array
+        movrel r11, const_array         @ r11 = base pointer to the constants array
 row_loop:
         ldrsh r0, [lr, # 0]             @ r0 = 'd0'
         ldrsh r2, [lr, # 2]             @ r2 = 'd2'
@@ -370,9 +367,9 @@ the_end:
         @ The end....
         add sp, sp, #4
         ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
+endfunc
 
-const_array:
-        .align
+const const_array
         .word FIX_0_298631336
         .word FIX_0_541196100
         .word FIX_0_765366865
@@ -386,3 +383,4 @@ const_array:
         .word FIX_M_1_961570560
         .word FIX_M_2_562915447
         .word FIX_0xFFFF
+endconst

From 13743c7ab00be270a3a38c8ee6442336774e335c Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Jun 2011 09:30:42 +0100
Subject: [PATCH 771/830] ARM: jrevdct_arm: use push/pop mnemonics

Use push/pop instead of stmdb/ldmia for stack operations.  This
is the preferred syntax.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/jrevdct_arm.S | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/arm/jrevdct_arm.S b/libavcodec/arm/jrevdct_arm.S
index f7fd5c7bde..08d42d7758 100644
--- a/libavcodec/arm/jrevdct_arm.S
+++ b/libavcodec/arm/jrevdct_arm.S
@@ -56,7 +56,7 @@
 #define FIX_0xFFFF_ID          48
 
 function ff_j_rev_dct_arm, export=1
-        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
+        push {r4 - r12, lr}
         sub sp, sp, #4                  @ reserve some space on the stack
         str r0, [ sp ]                  @ save the DCT pointer to the stack
 
@@ -99,7 +99,7 @@ row_loop:
         add r4, r6, r3, lsl #13             @ r4 = tmp11
         rsb r3, r6, r3, lsl #13             @ r3 = tmp12
 
-        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
 
         ldrsh r3, [lr, #10]             @ r3 = 'd3'
         ldrsh r5, [lr, #12]             @ r5 = 'd5'
@@ -133,8 +133,8 @@ row_loop:
         add r3, r3, r4                  @ r3 = tmp2
         add r1, r1, r6                  @ r1 = tmp3
 
-        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
-                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
 
         @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
         add r8, r0, r1
@@ -242,7 +242,7 @@ column_loop:
         orrs r10, r9, r10
         beq empty_odd_column
 
-        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
 
         add r0, r3, r5                  @ r0 = 'z2'
         add r2, r1, r7                  @ r2 = 'z1'
@@ -272,8 +272,8 @@ column_loop:
         add r3, r3, r4                  @ r3 = tmp2
         add r1, r1, r6                  @ r1 = tmp3
 
-        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
-                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
+                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
 
         @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
         add r8, r0, r1
@@ -366,7 +366,7 @@ empty_odd_column:
 the_end:
         @ The end....
         add sp, sp, #4
-        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
+        pop {r4 - r12, pc}
 endfunc
 
 const const_array

From 9776e25db9a43e77e9b091c012bf16267d9559d7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Jun 2011 09:33:20 +0100
Subject: [PATCH 772/830] ARM: jrevdct_arm: simplify stack usage

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/jrevdct_arm.S | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/jrevdct_arm.S b/libavcodec/arm/jrevdct_arm.S
index 08d42d7758..93cbbbe8eb 100644
--- a/libavcodec/arm/jrevdct_arm.S
+++ b/libavcodec/arm/jrevdct_arm.S
@@ -56,9 +56,7 @@
 #define FIX_0xFFFF_ID          48
 
 function ff_j_rev_dct_arm, export=1
-        push {r4 - r12, lr}
-        sub sp, sp, #4                  @ reserve some space on the stack
-        str r0, [ sp ]                  @ save the DCT pointer to the stack
+        push {r0, r4 - r11, lr}
 
         mov lr, r0                      @ lr = pointer to the current row
         mov r12, #8                     @ r12 = row-counter
@@ -208,7 +206,7 @@ end_of_row_loop:
 
 start_column_loop:
         @ Start of column loop
-        ldr lr, [ sp ]
+        pop {lr}
         mov r12, #8
 column_loop:
         ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
@@ -365,8 +363,7 @@ empty_odd_column:
 
 the_end:
         @ The end....
-        add sp, sp, #4
-        pop {r4 - r12, pc}
+        pop {r4 - r11, pc}
 endfunc
 
 const const_array

From 223694b404e63211ad7d1bd2916c070a0b587703 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Mon, 13 Jun 2011 18:28:54 +0200
Subject: [PATCH 773/830] nsv: return error code instead of discarding it in
 read_header()

---
 libavformat/nsvdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index b1efa5bd9c..81c672e388 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -531,7 +531,7 @@ static int nsv_read_header(AVFormatContext *s, AVFormatParameters *ap)
     err = nsv_read_chunk(s, 1);
 
     av_dlog(s, "parsed header\n");
-    return 0;
+    return err;
 }
 
 static int nsv_read_chunk(AVFormatContext *s, int fill_header)

From eb8de45c3c55721f0fc02cefcf1d08980b7ab67d Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Mon, 13 Jun 2011 18:30:55 +0200
Subject: [PATCH 774/830] nsv: simplify probe function

---
 libavformat/nsvdec.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 81c672e388..4898187f3a 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -21,6 +21,7 @@
 #include "avformat.h"
 #include "riff.h"
 #include "libavutil/dict.h"
+#include "libavutil/intreadwrite.h"
 
 //#define DEBUG_DUMP_INDEX // XXX dumbdriving-271.nsv breaks with it commented!!
 #define CHECK_SUBSEQUENT_NSVS
@@ -736,10 +737,8 @@ static int nsv_read_close(AVFormatContext *s)
 
 static int nsv_probe(AVProbeData *p)
 {
-    int i;
-    int score;
-    int vsize, asize, auxcount;
-    score = 0;
+    int i, score = 0;
+
     av_dlog(NULL, "nsv_probe(), buf_size %d\n", p->buf_size);
     /* check file header */
     /* streamed files might not have any header */
@@ -751,19 +750,14 @@ static int nsv_probe(AVProbeData *p)
     /* seems the servers don't bother starting clean chunks... */
     /* sometimes even the first header is at 9KB or something :^) */
     for (i = 1; i < p->buf_size - 3; i++) {
-        if (p->buf[i+0] == 'N' && p->buf[i+1] == 'S' &&
-            p->buf[i+2] == 'V' && p->buf[i+3] == 's') {
-            score = AVPROBE_SCORE_MAX/5;
+        if (AV_RL32(p->buf + i) == AV_RL32("NSVs")) {
             /* Get the chunk size and check if at the end we are getting 0xBEEF */
-            auxcount = p->buf[i+19];
-            vsize = p->buf[i+20]  | p->buf[i+21] << 8;
-            asize = p->buf[i+22]  | p->buf[i+23] << 8;
-            vsize = (vsize << 4) | (auxcount >> 4);
-            if ((asize + vsize + i + 23) <  p->buf_size - 2) {
-                if (p->buf[i+23+asize+vsize+1] == 0xEF &&
-                    p->buf[i+23+asize+vsize+2] == 0xBE)
-                    return AVPROBE_SCORE_MAX-20;
-            }
+            int vsize = AV_RL24(p->buf+i+19) >> 4;
+            int asize = AV_RL16(p->buf+i+22);
+            int offset = i + 23 + asize + vsize + 1;
+            if (offset <= p->buf_size - 2 && AV_RL16(p->buf + offset) == 0xBEEF)
+                return 4*AVPROBE_SCORE_MAX/5;
+            score = AVPROBE_SCORE_MAX/5;
         }
     }
     /* so we'll have more luck on extension... */

From 7ebaa967a2c5b038023409179ca3c56f5b4f4ed3 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Mon, 13 Jun 2011 18:37:32 +0200
Subject: [PATCH 775/830] matroskaenc: write colourspace element for rawvideo
 tracks

---
 libavformat/matroskaenc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 9901dc0b26..50e931e5c2 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -614,6 +614,9 @@ static int mkv_write_tracks(AVFormatContext *s)
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYHEIGHT, codec->height);
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYUNIT, 3);
                 }
+
+                if (codec->codec_id == CODEC_ID_RAWVIDEO)
+                    put_ebml_binary(pb, MATROSKA_ID_VIDEOCOLORSPACE, &codec->codec_tag, 4);
                 end_ebml_master(pb, subinfo);
                 break;
 

From e6ba3d428105edbd3c1a468ece619f801151eea2 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Mon, 13 Jun 2011 19:02:50 +0200
Subject: [PATCH 776/830] replace remaining usage of deprecated
 av_metadata_set2() by av_dict_set()

---
 libavformat/aiffdec.c     |  2 +-
 libavformat/matroskadec.c |  6 +++---
 libavformat/mov.c         |  2 +-
 libavformat/wav.c         | 10 +++++-----
 libavformat/wtvdec.c      |  6 +++---
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c
index 21f28e384f..9608910340 100644
--- a/libavformat/aiffdec.c
+++ b/libavformat/aiffdec.c
@@ -78,7 +78,7 @@ static void get_meta(AVFormatContext *s, const char *key, int size)
         }
         size += (size&1)-res;
         str[res] = 0;
-        av_metadata_set2(&s->metadata, key, str, AV_METADATA_DONT_STRDUP_VAL);
+        av_dict_set(&s->metadata, key, str, AV_METADATA_DONT_STRDUP_VAL);
     }else
         size+= size&1;
 
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 8fafbb33bf..bd71befcf9 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1515,7 +1515,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
             /* export stereo mode flag as metadata tag */
             if (track->video.stereo_mode && track->video.stereo_mode < MATROSKA_VIDEO_STEREO_MODE_COUNT)
-                av_metadata_set2(&st->metadata, "stereo_mode", matroska_video_stereo_mode[track->video.stereo_mode], 0);
+                av_dict_set(&st->metadata, "stereo_mode", matroska_video_stereo_mode[track->video.stereo_mode], 0);
 
             /* if we have virtual track, mark the real tracks */
             for (j=0; j < track->operation.combine_planes.nb_elem; j++) {
@@ -1526,8 +1526,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
                          matroska_video_stereo_plane[planes[j].type], i);
                 for (k=0; k < matroska->tracks.nb_elem; k++)
                     if (planes[j].uid == tracks[k].uid) {
-                        av_metadata_set2(&s->streams[k]->metadata,
-                                         "stereo_mode", buf, 0);
+                        av_dict_set(&s->streams[k]->metadata,
+                                    "stereo_mode", buf, 0);
                         break;
                     }
             }
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 6a49f37741..088df2cb4c 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1928,7 +1928,7 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     sc->height = height >> 16;
 
     if (display_matrix[0][0] == -65536 && display_matrix[1][1] == -65536) {
-         av_metadata_set2(&st->metadata, "rotate", "180", 0);
+         av_dict_set(&st->metadata, "rotate", "180", 0);
     }
 
     // transform the display width/height according to the matrix
diff --git a/libavformat/wav.c b/libavformat/wav.c
index 5c4b7d3a6b..c5dbd631b4 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -216,7 +216,7 @@ static inline int wav_parse_bext_string(AVFormatContext *s, const char *key, int
     temp[length] = 0;
 
     if (strlen(temp))
-        return av_metadata_set2(&s->metadata, key, temp, 0);
+        return av_dict_set(&s->metadata, key, temp, 0);
 
     return 0;
 }
@@ -237,7 +237,7 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
 
     time_reference = avio_rl64(s->pb);
     snprintf(temp, sizeof(temp), "%"PRIu64, time_reference);
-    if ((ret = av_metadata_set2(&s->metadata, "time_reference", temp, 0)) < 0)
+    if ((ret = av_dict_set(&s->metadata, "time_reference", temp, 0)) < 0)
         return ret;
 
     /* check if version is >= 1, in which case an UMID may be present */
@@ -259,7 +259,7 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
                          umid_parts[4], umid_parts[5], umid_parts[6], umid_parts[7]);
             }
 
-            if ((ret = av_metadata_set2(&s->metadata, "umid", temp, 0)) < 0)
+            if ((ret = av_dict_set(&s->metadata, "umid", temp, 0)) < 0)
                 return ret;
         }
 
@@ -278,8 +278,8 @@ static int wav_parse_bext_tag(AVFormatContext *s, int64_t size)
             return ret;
 
         coding_history[size] = 0;
-        if ((ret = av_metadata_set2(&s->metadata, "coding_history", coding_history,
-                                    AV_METADATA_DONT_STRDUP_VAL)) < 0)
+        if ((ret = av_dict_set(&s->metadata, "coding_history", coding_history,
+                               AV_METADATA_DONT_STRDUP_VAL)) < 0)
             return ret;
     }
 
diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c
index 5cbec0576a..5fe7e9fe12 100644
--- a/libavformat/wtvdec.c
+++ b/libavformat/wtvdec.c
@@ -431,7 +431,7 @@ static void get_attachment(AVFormatContext *s, AVIOContext *pb, int length)
     st = av_new_stream(s, 0);
     if (!st)
         goto done;
-    av_metadata_set2(&st->metadata, "title", description, 0);
+    av_dict_set(&st->metadata, "title", description, 0);
     st->codec->codec_id   = CODEC_ID_MJPEG;
     st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
     st->codec->extradata  = av_mallocz(filesize);
@@ -494,7 +494,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, const char *key, int ty
         return;
     }
 
-    av_metadata_set2(&s->metadata, key, buf, 0);
+    av_dict_set(&s->metadata, key, buf, 0);
     av_freep(&buf);
 }
 
@@ -818,7 +818,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p
                 avio_read(pb, language, 3);
                 if (language[0]) {
                     language[3] = 0;
-                    av_metadata_set2(&st->metadata, "language", language, 0);
+                    av_dict_set(&st->metadata, "language", language, 0);
                     if (!strcmp(language, "nar") || !strcmp(language, "NAR"))
                         st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED;
                 }

From 294e5475c2cf4bb7a0db34931515fddc5732c266 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 1 Apr 2011 12:46:36 +0200
Subject: [PATCH 777/830] ffv1: fix undefined behavior with insane widths.

The new tables is large enough to prevent this together with our image size checks.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/bitstream.c | 6 ++++--
 libavcodec/ffv1.c      | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index b593db55ce..17b1c69c84 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -32,11 +32,13 @@
 #include "get_bits.h"
 #include "put_bits.h"
 
-const uint8_t ff_log2_run[32]={
+const uint8_t ff_log2_run[41]={
  0, 0, 0, 0, 1, 1, 1, 1,
  2, 2, 2, 2, 3, 3, 3, 3,
  4, 4, 5, 5, 6, 6, 7, 7,
- 8, 9,10,11,12,13,14,15
+ 8, 9,10,11,12,13,14,15,
+16,17,18,19,20,21,22,23,
+24,
 };
 
 void align_put_bits(PutBitContext *s)
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 53edbb3459..50f1062ad4 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -40,7 +40,7 @@
 #define MAX_QUANT_TABLES 8
 #define MAX_CONTEXT_INPUTS 5
 
-extern const uint8_t ff_log2_run[32];
+extern const uint8_t ff_log2_run[41];
 
 static const int8_t quant3[256]={
  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

From a31d4b3a9929e31eb2df949cab09422267f5a5e3 Mon Sep 17 00:00:00 2001
From: Peter Ross <pross@xvid.org>
Date: Fri, 1 Apr 2011 23:11:24 +1100
Subject: [PATCH 778/830] img2: add .dpx to the list of supported file
 extensions.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/img2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/img2.c b/libavformat/img2.c
index 4e82aa301b..ec37a38845 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -496,7 +496,7 @@ AVInputFormat ff_image2pipe_demuxer = {
 AVOutputFormat ff_image2_muxer = {
     .name           = "image2",
     .long_name      = NULL_IF_CONFIG_SMALL("image2 sequence"),
-    .extensions     = "bmp,jpeg,jpg,ljpg,pam,pbm,pcx,pgm,pgmyuv,png,"
+    .extensions     = "bmp,dpx,jpeg,jpg,ljpg,pam,pbm,pcx,pgm,pgmyuv,png,"
                       "ppm,sgi,tga,tif,tiff,jp2",
     .priv_data_size = sizeof(VideoData),
     .video_codec    = CODEC_ID_MJPEG,

From 86961eeabf06ba4ef920789cb7596e8590e82713 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Fri, 1 Apr 2011 19:42:31 +0200
Subject: [PATCH 779/830] ac3dec: fix doxy-style for comment ("///>" should be
 "///<" instead).

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/ac3dec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index f0ab75ae98..590bee6b6d 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -194,7 +194,7 @@ typedef struct {
 ///@}
 
 ///@defgroup arrays aligned arrays
-    DECLARE_ALIGNED(16, int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///> fixed-point transform coefficients
+    DECLARE_ALIGNED(16, int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///< fixed-point transform coefficients
     DECLARE_ALIGNED(32, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
     DECLARE_ALIGNED(32, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
     DECLARE_ALIGNED(32, float, window)[AC3_BLOCK_SIZE];                              ///< window coefficients

From 33aec3f402c07e417ac1492717ab746927253727 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 2 Apr 2011 17:57:53 +0200
Subject: [PATCH 780/830] h264: change a few comments into error messages

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/h264.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 0aa923fdbb..e1fcb62a7b 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2339,8 +2339,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         MPV_common_end(s);
     }
     if (!s->context_initialized) {
-        if(h != h0)
-            return -1;  // we cant (re-)initialize context during parallel decoding
+        if (h != h0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
+            return -1;
+        }
 
         avcodec_set_dimensions(s->avctx, s->width, s->height);
         s->avctx->sample_aspect_ratio= h->sps.sar;
@@ -2384,8 +2386,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
 
-        if (MPV_common_init(s) < 0)
+        if (MPV_common_init(s) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
             return -1;
+        }
         s->first_field = 0;
         h->prev_interlaced_frame = 1;
 
@@ -2393,8 +2397,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         ff_h264_alloc_tables(h);
 
         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
-            if (context_init(h) < 0)
+            if (context_init(h) < 0) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
                 return -1;
+            }
         } else {
             for(i = 1; i < s->avctx->thread_count; i++) {
                 H264Context *c;
@@ -2410,8 +2416,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             }
 
             for(i = 0; i < s->avctx->thread_count; i++)
-                if(context_init(h->thread_context[i]) < 0)
+                if (context_init(h->thread_context[i]) < 0) {
+                    av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
                     return -1;
+                }
         }
     }
 
@@ -2706,8 +2714,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
                 h0->single_decode_warning = 1;
             }
-            if(h != h0)
-                return 1; // deblocking switched inside frame
+            if (h != h0) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
+                return 1;
+            }
         }
     }
     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);

From 3de33b00de219a749f3e5c9d1f1f68d7cfa58502 Mon Sep 17 00:00:00 2001
From: Wim Lewis <wiml@hhhh.org>
Date: Sat, 2 Apr 2011 15:30:24 -0700
Subject: [PATCH 781/830] avcodec.h: add or elaborate on some documentation
 comments.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/avcodec.h | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index ce3a4a6b94..a70d8adb90 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3253,7 +3253,9 @@ void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int c
 void av_resample_close(struct AVResampleContext *c);
 
 /**
- * Allocate memory for a picture.  Call avpicture_free to free it.
+ * Allocate memory for a picture.  Call avpicture_free() to free it.
+ *
+ * \see avpicture_fill()
  *
  * @param picture the picture to be filled in
  * @param pix_fmt the format of the picture
@@ -3265,6 +3267,8 @@ int avpicture_alloc(AVPicture *picture, enum PixelFormat pix_fmt, int width, int
 
 /**
  * Free a picture previously allocated by avpicture_alloc().
+ * The data buffer used by the AVPicture is freed, but the AVPicture structure
+ * itself is not.
  *
  * @param picture the AVPicture to be freed
  */
@@ -3280,6 +3284,9 @@ void avpicture_free(AVPicture *picture);
  * will be stored in the lines_sizes array.
  * Call with ptr == NULL to get the required size for the ptr buffer.
  *
+ * To allocate the buffer and fill in the AVPicture fields in one call,
+ * use avpicture_alloc().
+ *
  * @param picture AVPicture whose fields are to be filled in
  * @param ptr Buffer which will contain or contains the actual image data
  * @param pix_fmt The format in which the picture data is stored.
@@ -3289,6 +3296,22 @@ void avpicture_free(AVPicture *picture);
  */
 int avpicture_fill(AVPicture *picture, uint8_t *ptr,
                    enum PixelFormat pix_fmt, int width, int height);
+
+/**
+ * Copy pixel data from an AVPicture into a buffer.
+ * The data is stored compactly, without any gaps for alignment or padding
+ * which may be applied by avpicture_fill().
+ *
+ * \see avpicture_get_size()
+ *
+ * @param[in] src AVPicture containing image data
+ * @param[in] pix_fmt The format in which the picture data is stored.
+ * @param[in] width the width of the image in pixels.
+ * @param[in] height the height of the image in pixels.
+ * @param[out] dest A buffer into which picture data will be copied.
+ * @param[in] dest_size The size of 'dest'.
+ * @return The number of bytes written to dest, or a negative value (error code) on error.
+ */
 int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width, int height,
                      unsigned char *dest, int dest_size);
 
@@ -3296,8 +3319,8 @@ int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width,
  * Calculate the size in bytes that a picture of the given width and height
  * would occupy if stored in the given picture format.
  * Note that this returns the size of a compact representation as generated
- * by avpicture_layout, which can be smaller than the size required for e.g.
- * avpicture_fill.
+ * by avpicture_layout(), which can be smaller than the size required for e.g.
+ * avpicture_fill().
  *
  * @param pix_fmt the given picture format
  * @param width the width of the image
@@ -3426,16 +3449,19 @@ const char *avcodec_license(void);
 
 /**
  * Initialize libavcodec.
+ * If called more than once, does nothing.
  *
  * @warning This function must be called before any other libavcodec
  * function.
+ *
+ * @warning This function is not thread-safe.
  */
 void avcodec_init(void);
 
 /**
  * Register the codec codec and initialize libavcodec.
  *
- * @see avcodec_init()
+ * @see avcodec_init(), avcodec_register_all()
  */
 void avcodec_register(AVCodec *codec);
 
@@ -3615,7 +3641,7 @@ int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2,
  * @param avctx The context which will be set up to use the given codec.
  * @param codec The codec to use within the context.
  * @return zero on success, a negative value on error
- * @see avcodec_alloc_context, avcodec_find_decoder, avcodec_find_encoder
+ * @see avcodec_alloc_context, avcodec_find_decoder, avcodec_find_encoder, avcodec_close
  */
 int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
 

From 108f318d908c552d88bc7570515e4ddb9ea45e3d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 1 Mar 2011 15:59:00 +0100
Subject: [PATCH 782/830] h264: don't be so picky on decoding pps in extradata.

Fixes issue2517

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/h264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index e1fcb62a7b..276d6e6d6c 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -995,7 +995,7 @@ int ff_h264_decode_extradata(H264Context *h)
         cnt = *(p++); // Number of pps
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
-            if(decode_nal_units(h, p, nalsize)  != nalsize) {
+            if (decode_nal_units(h, p, nalsize) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
                 return -1;
             }

From a8fd2f4e0238d6ddde0db28c5bb4b39d2f98d4ed Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 8 Mar 2011 22:39:14 +0100
Subject: [PATCH 783/830] lavf: initialise reference_dts in
 av_estimate_timings_from_pts.

Fixes issue2437.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/utils.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index dc3b9d8fb8..d0fd0d46ff 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1933,6 +1933,7 @@ static void av_estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset
         st= ic->streams[i];
         st->cur_dts= st->first_dts;
         st->last_IP_pts = AV_NOPTS_VALUE;
+        st->reference_dts = AV_NOPTS_VALUE;
     }
 }
 

From 8d0786ec6d066f892f29da6593e99e73a7dfd014 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Sun, 3 Apr 2011 22:45:16 +0200
Subject: [PATCH 784/830] wav: remove an invalid free().

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/wav.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/wav.c b/libavformat/wav.c
index 21374e8b93..92c9bfcbc4 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -52,7 +52,6 @@ static int wav_write_header(AVFormatContext *s)
     if (ff_put_wav_header(pb, s->streams[0]->codec) < 0) {
         av_log(s, AV_LOG_ERROR, "%s codec not supported in WAVE format\n",
                s->streams[0]->codec->codec ? s->streams[0]->codec->codec->name : "NONE");
-        av_free(wav);
         return -1;
     }
     ff_end_tag(pb, fmt);

From a26ce1e2df102ad085cf1a7891722ef64b80ea24 Mon Sep 17 00:00:00 2001
From: Philip Langdale <philipl@overt.org>
Date: Mon, 28 Mar 2011 21:42:02 -0700
Subject: [PATCH 785/830] h264_parser: Fix behaviour when
 PARSER_FLAG_COMPLETE_FRAMES is set.

Currently, the parser is buggy and only processes the stream extradata
when the flag is set. This fixes it to actually inspect the frames.

Whitespce will be fixed in a separate change.

Signed-off-by: Philip Langdale <philipl@overt.org>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/h264_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 621ff02925..a3149534a8 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -270,6 +270,7 @@ static int h264_parse(AVCodecParserContext *s,
             assert(pc->last_index + next >= 0 );
             ff_h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); //update state
         }
+    }
 
         parse_nal_units(s, avctx, buf, buf_size);
 
@@ -285,7 +286,6 @@ static int h264_parse(AVCodecParserContext *s,
         if (s->flags & PARSER_FLAG_ONCE) {
             s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
         }
-    }
 
     *poutbuf = buf;
     *poutbuf_size = buf_size;

From 25f05ddb1af4bd398fa92cd135e48fafe23bd92a Mon Sep 17 00:00:00 2001
From: Philip Langdale <philipl@overt.org>
Date: Mon, 28 Mar 2011 21:43:23 -0700
Subject: [PATCH 786/830] h264_parser: Fix whitespace after previous change.

Signed-off-by: Philip Langdale <philipl@overt.org>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/h264_parser.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index a3149534a8..c39baeb739 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -272,20 +272,21 @@ static int h264_parse(AVCodecParserContext *s,
         }
     }
 
-        parse_nal_units(s, avctx, buf, buf_size);
+    parse_nal_units(s, avctx, buf, buf_size);
 
-        if (h->sei_cpb_removal_delay >= 0) {
-            s->dts_sync_point    = h->sei_buffering_period_present;
-            s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
-            s->pts_dts_delta     = h->sei_dpb_output_delay;
-        } else {
-            s->dts_sync_point    = INT_MIN;
-            s->dts_ref_dts_delta = INT_MIN;
-            s->pts_dts_delta     = INT_MIN;
-        }
-        if (s->flags & PARSER_FLAG_ONCE) {
-            s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
-        }
+    if (h->sei_cpb_removal_delay >= 0) {
+        s->dts_sync_point    = h->sei_buffering_period_present;
+        s->dts_ref_dts_delta = h->sei_cpb_removal_delay;
+        s->pts_dts_delta     = h->sei_dpb_output_delay;
+    } else {
+        s->dts_sync_point    = INT_MIN;
+        s->dts_ref_dts_delta = INT_MIN;
+        s->pts_dts_delta     = INT_MIN;
+    }
+
+    if (s->flags & PARSER_FLAG_ONCE) {
+        s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
+    }
 
     *poutbuf = buf;
     *poutbuf_size = buf_size;

From c9c493872c385cff304438ee404e38e55f04af28 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Fri, 3 Jun 2011 01:12:28 -0700
Subject: [PATCH 787/830] 4:4:4 H.264 decoding support

Note: this is 4:4:4 from the 2007 spec revision, not the previous (now deprecated) 4:4:4 mode in H.264.
---
 libavcodec/dsputil.h               |   2 +-
 libavcodec/dsputil_template.c      |   6 +-
 libavcodec/h264.c                  | 760 +++++++++++++++++---------
 libavcodec/h264.h                  | 181 ++++---
 libavcodec/h264_cabac.c            | 819 +++++++++++++++++++++++++----
 libavcodec/h264_cavlc.c            | 198 ++++---
 libavcodec/h264_loopfilter.c       |  70 ++-
 libavcodec/h264_ps.c               |  14 +-
 libavcodec/h264dsp.h               |   8 +-
 libavcodec/h264idct_template.c     |  44 +-
 libavcodec/mpegvideo.c             |  25 +-
 libavcodec/snow.c                  |   6 +-
 libavcodec/x86/dsputil_mmx.c       |   4 +-
 libavcodec/x86/h264_i386.h         |  15 +-
 libavcodec/x86/h264_idct.asm       |  44 +-
 libavcodec/x86/h264_idct_10bit.asm |  35 +-
 16 files changed, 1595 insertions(+), 636 deletions(-)

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index cfc574aebb..7a28b06fd5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -507,7 +507,7 @@ typedef struct DSPContext {
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
-    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int sides);
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides);
 #define EDGE_WIDTH 16
 #define EDGE_TOP    1
 #define EDGE_BOTTOM 2
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 8ca6d3e414..b85931856a 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -79,7 +79,7 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS
 
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
-static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
 {
     pixel *buf = (pixel*)_buf;
     int wrap = _wrap / sizeof(pixel);
@@ -106,10 +106,10 @@ static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, i
     buf -= w;
     last_line = buf + (height - 1) * wrap;
     if (sides & EDGE_TOP)
-        for(i = 0; i < w; i++)
+        for(i = 0; i < h; i++)
             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
     if (sides & EDGE_BOTTOM)
-        for (i = 0; i < w; i++)
+        for (i = 0; i < h; i++)
             memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
 }
 
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 276d6e6d6c..86ea218807 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -451,12 +451,13 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
-                           int pixel_shift){
+                           int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
+    int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
+    uint8_t * src_y = pic->data[0] + offset;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -483,6 +484,31 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
 
+    if(chroma444){
+        src_cb = pic->data[1] + offset;
+        if(emu){
+            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
+                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
+        }
+        qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
+        if(!square){
+            qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
+        }
+
+        src_cr = pic->data[2] + offset;
+        if(emu){
+            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
+                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
+        }
+        qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
+        if(!square){
+            qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
+        }
+        return;
+    }
+
     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
 
     if(MB_FIELD){
@@ -511,14 +537,19 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    if(chroma444){
+        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    }else{
+        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    }
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -526,7 +557,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift);
+                           qpix_op, chroma_op, pixel_shift, chroma444);
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
@@ -536,7 +567,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift);
+                           qpix_op, chroma_op, pixel_shift, chroma444);
     }
 }
 
@@ -546,12 +577,19 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
 
-    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    if(chroma444){
+        chroma_weight_avg = luma_weight_avg;
+        chroma_weight_op = luma_weight_op;
+        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    }else{
+        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    }
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -559,17 +597,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
-        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
+        uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
@@ -594,7 +632,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put, pixel_shift);
+                    qpix_put, chroma_put, pixel_shift, chroma444);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
@@ -613,21 +651,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
                          weight_op[0], weight_op[3], weight_avg[0],
-                         weight_avg[3], list0, list1, pixel_shift);
+                         weight_avg[3], list0, list1, pixel_shift, chroma444);
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
-                    chroma_avg, list0, list1, pixel_shift);
+                    chroma_avg, list0, list1, pixel_shift, chroma444);
 }
 
-static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
+static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
@@ -638,8 +676,13 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
         uint8_t **src= h->ref_list[list][refn].data;
         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
-        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+        if(chroma444){
+            s->dsp.prefetch(src[1]+off, s->linesize, 4);
+            s->dsp.prefetch(src[2]+off, s->linesize, 4);
+        }else{
+            off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
+            s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+        }
     }
 }
 
@@ -647,7 +690,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                      int pixel_shift){
+                      int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
@@ -656,36 +699,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
 
     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h);
-    prefetch_motion(h, 0, pixel_shift);
+    prefetch_motion(h, 0, pixel_shift, chroma444);
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else if(IS_16X8(mb_type)){
         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else{
         int i;
 
@@ -702,29 +745,29 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else if(IS_SUB_8X4(sub_mb_type)){
                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
@@ -735,13 +778,13 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                        pixel_shift);
+                        pixel_shift, chroma444);
                 }
             }
         }
     }
 
-    prefetch_motion(h, 1, pixel_shift);
+    prefetch_motion(h, 1, pixel_shift, chroma444);
 }
 
 #define hl_motion_fn(sh, bits) \
@@ -753,10 +796,11 @@ static av_always_inline void hl_motion_ ## bits(H264Context *h, \
                                        qpel_mc_func (*qpix_avg)[16], \
                                        h264_chroma_mc_func (*chroma_avg), \
                                        h264_weight_func *weight_op, \
-                                       h264_biweight_func *weight_avg) \
+                                       h264_biweight_func *weight_avg, \
+                                       int chroma444) \
 { \
     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
-              qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
+              qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
 }
 hl_motion_fn(0, 8);
 hl_motion_fn(1, 16);
@@ -796,16 +840,19 @@ static void free_tables(H264Context *h, int free_rbsp){
 }
 
 static void init_dequant8_coeff_table(H264Context *h){
-    int i,q,x;
+    int i,j,q,x;
     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
-    h->dequant8_coeff[0] = h->dequant8_buffer[0];
-    h->dequant8_coeff[1] = h->dequant8_buffer[1];
 
-    for(i=0; i<2; i++ ){
-        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
-            h->dequant8_coeff[1] = h->dequant8_buffer[0];
-            break;
+    for(i=0; i<6; i++ ){
+        h->dequant8_coeff[i] = h->dequant8_buffer[i];
+        for(j=0; j<i; j++){
+            if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
+                h->dequant8_coeff[i] = h->dequant8_buffer[j];
+                break;
+            }
         }
+        if(j<i)
+            continue;
 
         for(q=0; q<max_qp+1; q++){
             int shift = div6[q];
@@ -853,7 +900,7 @@ static void init_dequant_tables(H264Context *h){
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][0][x] = 1<<6;
         if(h->pps.transform_8x8_mode)
-            for(i=0; i<2; i++)
+            for(i=0; i<6; i++)
                 for(x=0; x<64; x++)
                     h->dequant8_coeff[i][0][x] = 1<<6;
     }
@@ -868,7 +915,7 @@ int ff_h264_alloc_tables(H264Context *h){
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
 
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 32 * sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
 
@@ -930,8 +977,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
  * Allocate buffers which are not shared amongst multiple threads.
  */
 static int context_init(H264Context *h){
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
 
     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
@@ -1130,9 +1177,10 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
 
         // frame_start may not be called for the next thread (if it's decoding a bottom field)
         // so this has to be allocated here
-        h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+        h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
 
         s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
     }
 
     //extradata/NAL handling
@@ -1151,7 +1199,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
     for(i=0; i<6; i++)
         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
 
-    for(i=0; i<2; i++)
+    for(i=0; i<6; i++)
         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
 
     h->dequant_coeff_pps = h1->dequant_coeff_pps;
@@ -1206,20 +1254,20 @@ int ff_h264_frame_start(H264Context *h){
 
     for(i=0; i<16; i++){
         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
-    for(i=0; i<4; i++){
+    for(i=0; i<16; i++){
         h->block_offset[16+i]=
-        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+16+i]=
-        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[48+16+i]=
+        h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
     for(i = 0; i < thread_count; i++)
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
-            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
 
     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
@@ -1404,7 +1452,7 @@ static void decode_postinit(H264Context *h, int setup_finished){
         ff_thread_finish_setup(s->avctx);
 }
 
-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
     int top_idx = 1;
@@ -1422,12 +1470,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
                 if (pixel_shift)
                     AV_COPY128(top_border+16, src_y+15*linesize+16);
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                    if (pixel_shift) {
-                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
-                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
+                    if(chroma444){
+                        if (pixel_shift){
+                            AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
+                            AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
+                            AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
+                            AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
+                        } else {
+                            AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
+                            AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
+                        }
                     } else {
-                    AV_COPY64(top_border+16, src_cb+7*uvlinesize);
-                    AV_COPY64(top_border+24, src_cr+7*uvlinesize);
+                        if (pixel_shift) {
+                            AV_COPY128(top_border+32, src_cb+7*uvlinesize);
+                            AV_COPY128(top_border+48, src_cr+7*uvlinesize);
+                        } else {
+                            AV_COPY64(top_border+16, src_cb+7*uvlinesize);
+                            AV_COPY64(top_border+24, src_cr+7*uvlinesize);
+                        }
                     }
                 }
             }
@@ -1445,12 +1505,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
         AV_COPY128(top_border+16, src_y+16*linesize+16);
 
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if (pixel_shift) {
-            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
-            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
+        if(chroma444){
+            if (pixel_shift){
+                AV_COPY128(top_border+32, src_cb + 16*linesize);
+                AV_COPY128(top_border+48, src_cb + 16*linesize+16);
+                AV_COPY128(top_border+64, src_cr + 16*linesize);
+                AV_COPY128(top_border+80, src_cr + 16*linesize+16);
+            } else {
+                AV_COPY128(top_border+16, src_cb + 16*linesize);
+                AV_COPY128(top_border+32, src_cr + 16*linesize);
+            }
         } else {
-        AV_COPY64(top_border+16, src_cb+8*uvlinesize);
-        AV_COPY64(top_border+24, src_cr+8*uvlinesize);
+            if (pixel_shift) {
+                AV_COPY128(top_border+32, src_cb+8*uvlinesize);
+                AV_COPY128(top_border+48, src_cr+8*uvlinesize);
+            } else {
+                AV_COPY64(top_border+16, src_cb+8*uvlinesize);
+                AV_COPY64(top_border+24, src_cr+8*uvlinesize);
+            }
         }
     }
 }
@@ -1458,7 +1530,8 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   uint8_t *src_cb, uint8_t *src_cr,
                                   int linesize, int uvlinesize,
-                                  int xchg, int simple, int pixel_shift){
+                                  int xchg, int chroma444,
+                                  int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     int deblock_topleft;
     int deblock_top;
@@ -1513,13 +1586,28 @@ else      AV_COPY64(b,a);
         }
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if(deblock_top){
+        if(chroma444){
             if(deblock_topleft){
-                XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
-                XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+            }
+            XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
+            XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
+            XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
+            XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
+            if(s->mb_x+1 < s->mb_width){
+                XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
+                XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
+            }
+        } else {
+            if(deblock_top){
+                if(deblock_topleft){
+                    XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                    XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+                }
+                XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
+                XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
             }
-            XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
-            XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
         }
     }
 }
@@ -1538,6 +1626,159 @@ static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int in
         AV_WN16A(mb + index, value);
 }
 
+static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
+                                                       int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
+{
+    MpegEncContext * const s = &h->s;
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    int i;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    block_offset += 16*p;
+    if(IS_INTRA4x4(mb_type)){
+        if(simple || !s->encoding){
+            if(IS_8x8DCT(mb_type)){
+                if(transform_bypass){
+                    idct_dc_add =
+                    idct_add    = s->dsp.add_pixels8;
+                }else{
+                    idct_dc_add = h->h264dsp.h264_idct8_dc_add;
+                    idct_add    = h->h264dsp.h264_idct8_add;
+                }
+                for(i=0; i<16; i+=4){
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                        h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                    }else{
+                        const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
+                        h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+                                                    (h->topright_samples_available<<i)&0x4000, linesize);
+                        if(nnz){
+                            if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
+                                idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                            else
+                                idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }
+            }else{
+                if(transform_bypass){
+                    idct_dc_add =
+                    idct_add    = s->dsp.add_pixels4;
+                }else{
+                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
+                    idct_add    = h->h264dsp.h264_idct_add;
+                }
+                for(i=0; i<16; i++){
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+
+                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                        h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                    }else{
+                        uint8_t *topright;
+                        int nnz, tr;
+                        uint64_t tr_high;
+                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                            assert(mb_y || linesize <= block_offset[i]);
+                            if(!topright_avail){
+                                if (pixel_shift) {
+                                    tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
+                                    topright= (uint8_t*) &tr_high;
+                                } else {
+                                    tr= ptr[3 - linesize]*0x01010101;
+                                    topright= (uint8_t*) &tr;
+                                }
+                            }else
+                                topright= ptr + (4 << pixel_shift) - linesize;
+                        }else
+                            topright= NULL;
+
+                        h->hpc.pred4x4[ dir ](ptr, topright, linesize);
+                        nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
+                        if(nnz){
+                            if(is_h264){
+                                if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
+                                    idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                                else
+                                    idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                            }else
+                                ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
+                        }
+                    }
+                }
+            }
+        }
+    }else{
+        h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+        if(is_h264){
+            if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
+                if(!transform_bypass)
+                    h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
+                else{
+                    static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
+                                                            8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
+                    for(i = 0; i < 16; i++)
+                        dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
+                }
+            }
+        }else
+            ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
+    }
+}
+
+static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
+                                                    int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
+{
+    MpegEncContext * const s = &h->s;
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    int i;
+    block_offset += 16*p;
+    if(!IS_INTRA4x4(mb_type)){
+        if(is_h264){
+            if(IS_INTRA16x16(mb_type)){
+                if(transform_bypass){
+                    if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
+                        h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
+                    }else{
+                        for(i=0; i<16; i++){
+                            if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }else{
+                    h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                }
+            }else if(h->cbp&15){
+                if(transform_bypass){
+                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+                    idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+                    for(i=0; i<16; i+=di){
+                        if(h->non_zero_count_cache[ scan8[i+p*16] ]){
+                            idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }else{
+                    if(IS_8x8DCT(mb_type)){
+                        h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                    }else{
+                        h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                    }
+                }
+            }
+        }else{
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                }
+            }
+        }
+    }
+}
+
 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
@@ -1546,13 +1787,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize /*dct_offset*/;
-    int i;
+    int i, j;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
     /* is_h264 should always be true if SVQ3 is disabled. */
     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
@@ -1566,7 +1806,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     if (!simple && MB_FIELD) {
         linesize   = h->mb_linesize   = s->linesize * 2;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
-        block_offset = &h->block_offset[24];
+        block_offset = &h->block_offset[48];
         if(mb_y&1){ //FIXME move out of this function?
             dest_y -= s->linesize*15;
             dest_cb-= s->uvlinesize*7;
@@ -1629,194 +1869,67 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     } else {
         if(IS_INTRA(mb_type)){
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
 
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
             }
 
-            if(IS_INTRA4x4(mb_type)){
-                if(simple || !s->encoding){
-                    if(IS_8x8DCT(mb_type)){
-                        if(transform_bypass){
-                            idct_dc_add =
-                            idct_add    = s->dsp.add_pixels8;
-                        }else{
-                            idct_dc_add = h->h264dsp.h264_idct8_dc_add;
-                            idct_add    = h->h264dsp.h264_idct8_add;
-                        }
-                        for(i=0; i<16; i+=4){
-                            uint8_t * const ptr= dest_y + block_offset[i];
-                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
-                            }else{
-                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
-                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
-                                                            (h->topright_samples_available<<i)&0x4000, linesize);
-                                if(nnz){
-                                    if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
-                                        idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                    else
-                                        idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                }
-                            }
-                        }
-                    }else{
-                        if(transform_bypass){
-                            idct_dc_add =
-                            idct_add    = s->dsp.add_pixels4;
-                        }else{
-                            idct_dc_add = h->h264dsp.h264_idct_dc_add;
-                            idct_add    = h->h264dsp.h264_idct_add;
-                        }
-                        for(i=0; i<16; i++){
-                            uint8_t * const ptr= dest_y + block_offset[i];
-                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+            hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
 
-                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
-                            }else{
-                                uint8_t *topright;
-                                int nnz, tr;
-                                uint64_t tr_high;
-                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
-                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
-                                    assert(mb_y || linesize <= block_offset[i]);
-                                    if(!topright_avail){
-                                        if (pixel_shift) {
-                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
-                                            topright= (uint8_t*) &tr_high;
-                                        } else {
-                                        tr= ptr[3 - linesize]*0x01010101;
-                                        topright= (uint8_t*) &tr;
-                                        }
-                                    }else
-                                        topright= ptr + (4 << pixel_shift) - linesize;
-                                }else
-                                    topright= NULL;
-
-                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
-                                nnz = h->non_zero_count_cache[ scan8[i] ];
-                                if(nnz){
-                                    if(is_h264){
-                                        if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
-                                            idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                        else
-                                            idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                    }else
-                                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
-                                }
-                            }
-                        }
-                    }
-                }
-            }else{
-                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-                if(is_h264){
-                    if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
-                        if(!transform_bypass)
-                            h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
-                        else{
-                            static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
-                                                                    8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
-                            for(i = 0; i < 16; i++)
-                                dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
-                        }
-                    }
-                }else
-                    ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
-            }
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
         }else if(is_h264){
             if (pixel_shift) {
                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab);
+                             h->h264dsp.biweight_h264_pixels_tab, 0);
             } else
                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                             h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab);
+                            h->h264dsp.biweight_h264_pixels_tab, 0);
         }
 
-
-        if(!IS_INTRA4x4(mb_type)){
-            if(is_h264){
-                if(IS_INTRA16x16(mb_type)){
-                    if(transform_bypass){
-                        if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
-                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
-                        }else{
-                            for(i=0; i<16; i++){
-                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
-                            }
-                        }
-                    }else{
-                         h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                    }
-                }else if(h->cbp&15){
-                    if(transform_bypass){
-                        const int di = IS_8x8DCT(mb_type) ? 4 : 1;
-                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
-                        for(i=0; i<16; i+=di){
-                            if(h->non_zero_count_cache[ scan8[i] ]){
-                                idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
-                            }
-                        }
-                    }else{
-                        if(IS_8x8DCT(mb_type)){
-                            h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                        }else{
-                            h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                        }
-                    }
-                }
-            }else{
-                for(i=0; i<16; i++){
-                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                        uint8_t * const ptr= dest_y + block_offset[i];
-                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
-                    }
-                }
-            }
-        }
+        hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
 
         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
                 }else{
                     idct_add = s->dsp.add_pixels4;
-                    for(i=16; i<16+8; i++){
-                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
+                    for(j=1; j<3; j++){
+                        for(i=j*16; i<j*16+4; i++){
+                            if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
+                        }
                     }
                 }
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
                 }else{
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
-                    for(i=16; i<16+8; i++){
-                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                            uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
-                            ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                    for(j=1; j<3; j++){
+                        for(i=j*16; i<j*16+4; i++){
+                            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                                uint8_t * const ptr= dest[j-1] + block_offset[i];
+                                ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
+                            }
                         }
                     }
                 }
@@ -1824,7 +1937,112 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
         }
     }
     if(h->cbp || IS_INTRA(mb_type))
+    {
         s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
+    }
+}
+
+static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
+    MpegEncContext * const s = &h->s;
+    const int mb_x= s->mb_x;
+    const int mb_y= s->mb_y;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    uint8_t  *dest[3];
+    int linesize;
+    int i, j, p;
+    int *block_offset = &h->block_offset[0];
+    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
+
+    for (p = 0; p < 3; p++)
+    {
+        dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
+        s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
+    }
+
+    h->list_counts[mb_xy]= h->list_count;
+
+    if (!simple && MB_FIELD) {
+        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
+        block_offset = &h->block_offset[48];
+        if(mb_y&1) //FIXME move out of this function?
+            for (p = 0; p < 3; p++)
+                dest[p] -= s->linesize*15;
+        if(FRAME_MBAFF) {
+            int list;
+            for(list=0; list<h->list_count; list++){
+                if(!USES_LIST(mb_type, list))
+                    continue;
+                if(IS_16X16(mb_type)){
+                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
+                }else{
+                    for(i=0; i<16; i+=4){
+                        int ref = h->ref_cache[list][scan8[i]];
+                        if(ref >= 0)
+                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
+                    }
+                }
+            }
+        }
+    } else {
+        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
+    }
+
+    if (!simple && IS_INTRA_PCM(mb_type)) {
+        if (pixel_shift) {
+            const int bit_depth = h->sps.bit_depth_luma;
+            GetBitContext gb;
+            init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
+
+            for (p = 0; p < 3; p++) {
+                for (i = 0; i < 16; i++) {
+                    uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
+                    for (j = 0; j < 16; j++)
+                        tmp[j] = get_bits(&gb, bit_depth);
+                }
+            }
+        } else {
+            for (p = 0; p < 3; p++) {
+                for (i = 0; i < 16; i++) {
+                    memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
+                }
+            }
+        }
+    } else {
+        if(IS_INTRA(mb_type)){
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
+
+            for (p = 0; p < 3; p++)
+                hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
+
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
+        }else{
+            if (pixel_shift) {
+                hl_motion_16(h, dest[0], dest[1], dest[2],
+                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                             h->h264dsp.weight_h264_pixels_tab,
+                             h->h264dsp.biweight_h264_pixels_tab, 1);
+            } else
+                hl_motion_8(h, dest[0], dest[1], dest[2],
+                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                            h->h264dsp.weight_h264_pixels_tab,
+                            h->h264dsp.biweight_h264_pixels_tab, 1);
+        }
+
+        for (p = 0; p < 3; p++)
+            hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
+    }
+    if(h->cbp || IS_INTRA(mb_type))
+    {
+        s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
+    }
 }
 
 /**
@@ -1844,13 +2062,26 @@ static void av_noinline hl_decode_mb_complex(H264Context *h){
     hl_decode_mb_internal(h, 0, h->pixel_shift);
 }
 
+static void av_noinline hl_decode_mb_444_complex(H264Context *h){
+    hl_decode_mb_444_internal(h, 0, h->pixel_shift);
+}
+
+static void av_noinline hl_decode_mb_444_simple(H264Context *h){
+    hl_decode_mb_444_internal(h, 1, 0);
+}
+
 void ff_h264_hl_decode_mb(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
 
-    if (is_complex) {
+    if (CHROMA444) {
+        if(is_complex || h->pixel_shift)
+            hl_decode_mb_444_complex(h);
+        else
+            hl_decode_mb_444_simple(h);
+    } else if (is_complex) {
         hl_decode_mb_complex(h);
     } else if (h->pixel_shift) {
         hl_decode_mb_simple_16(h);
@@ -1866,7 +2097,7 @@ static int pred_weight_table(H264Context *h){
     h->use_weight= 0;
     h->use_weight_chroma= 0;
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
-    if(CHROMA)
+    if(h->sps.chroma_format_idc)
         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
     luma_def = 1<<h->luma_log2_weight_denom;
     chroma_def = 1<<h->chroma_log2_weight_denom;
@@ -1891,7 +2122,7 @@ static int pred_weight_table(H264Context *h){
                 h->luma_weight[i][list][1]= 0;
             }
 
-            if(CHROMA){
+            if(h->sps.chroma_format_idc){
                 chroma_weight_flag= get_bits1(&s->gb);
                 if(chroma_weight_flag){
                     int j;
@@ -2321,11 +2552,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     h->b_stride=  s->mb_width*4;
 
-    s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
+    s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
     if(h->sps.frame_mbs_only_flag)
-        s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
+        s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
     else
-        s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
+        s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
 
     if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height
@@ -2370,18 +2601,22 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
         switch (h->sps.bit_depth_luma) {
             case 9 :
-                s->avctx->pix_fmt = PIX_FMT_YUV420P9;
+                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
                 break;
             case 10 :
-                s->avctx->pix_fmt = PIX_FMT_YUV420P10;
+                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
                 break;
             default:
-        s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
-                                                 s->avctx->codec->pix_fmts ?
-                                                 s->avctx->codec->pix_fmts :
-                                                 s->avctx->color_range == AVCOL_RANGE_JPEG ?
-                                                 hwaccel_pixfmt_list_h264_jpeg_420 :
-                                                 ff_hwaccel_pixfmt_list_420);
+                if (CHROMA444){
+                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
+                }else{
+                    s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
+                                                             s->avctx->codec->pix_fmts ?
+                                                             s->avctx->codec->pix_fmts :
+                                                             s->avctx->color_range == AVCOL_RANGE_JPEG ?
+                                                             hwaccel_pixfmt_list_h264_jpeg_420 :
+                                                             ff_hwaccel_pixfmt_list_420);
+                }
         }
 
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
@@ -2873,11 +3108,10 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     if(IS_INTRA(mb_type))
         return 0;
 
-    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
-    AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
-    AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
 
     h->cbp= h->cbp_table[mb_xy];
 
@@ -2929,45 +3163,45 @@ static int fill_filter_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
     }
 
     if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
+        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
+        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
+        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
+        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
     }
 
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
     if(!CABAC && h->pps.transform_8x8_mode){
         if(IS_8x8DCT(top_type)){
             h->non_zero_count_cache[4+8*0]=
-            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
+            h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
             h->non_zero_count_cache[6+8*0]=
-            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
+            h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
         }
         if(IS_8x8DCT(left_type[0])){
             h->non_zero_count_cache[3+8*1]=
-            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
         }
         if(IS_8x8DCT(left_type[1])){
             h->non_zero_count_cache[3+8*3]=
-            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
         }
 
         if(IS_8x8DCT(mb_type)){
             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
 
             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
 
             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
 
             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
         }
     }
 
@@ -3041,8 +3275,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
-                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
-                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
+                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
                     //FIXME simplify above
 
                 if (MB_FIELD) {
@@ -3057,7 +3291,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                     linesize   = h->mb_linesize   = s->linesize;
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
                 }
-                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
+                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
                 if(fill_filter_caches(h, mb_type))
                     continue;
                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 8c4f1ab21a..3abf895010 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -39,9 +39,6 @@
 #define interlaced_dct interlaced_dct_is_a_bad_name
 #define mb_intra mb_intra_is_not_initialized_see_mb_type
 
-#define LUMA_DC_BLOCK_INDEX   24
-#define CHROMA_DC_BLOCK_INDEX 25
-
 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
 #define COEFF_TOKEN_VLC_BITS           8
 #define TOTAL_ZEROS_VLC_BITS           9
@@ -60,8 +57,6 @@
  * of progressive decoding by about 2%. */
 #define ALLOW_INTERLACE
 
-#define ALLOW_NOCHROMA
-
 #define FMO 0
 
 /**
@@ -85,16 +80,12 @@
 #endif
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
 
-#ifdef ALLOW_NOCHROMA
-#define CHROMA h->sps.chroma_format_idc
-#else
-#define CHROMA 1
-#endif
-
 #ifndef CABAC
 #define CABAC h->pps.cabac
 #endif
 
+#define CHROMA444 (h->sps.chroma_format_idc == 3)
+
 #define EXTENDED_SAR          255
 
 #define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
@@ -198,7 +189,7 @@ typedef struct SPS{
     int num_reorder_frames;
     int scaling_matrix_present;
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[2][64];
+    uint8_t scaling_matrix8[6][64];
     int nal_hrd_parameters_present_flag;
     int vcl_hrd_parameters_present_flag;
     int pic_struct_present_flag;
@@ -233,7 +224,7 @@ typedef struct PPS{
     int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;     ///< transform_8x8_mode_flag
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[2][64];
+    uint8_t scaling_matrix8[6][64];
     uint8_t chroma_qp_table[2][64];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
 }PPS;
@@ -298,21 +289,15 @@ typedef struct H264Context{
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
-    uint8_t (*top_borders[2])[(16+2*8)*2];
+    uint8_t (*top_borders[2])[(16*3)*2];
 
     /**
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
+    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
 
-    /*
-    .UU.YYYY
-    .UU.YYYY
-    .vv.YYYY
-    .VV.YYYY
-    */
-    uint8_t (*non_zero_count)[32];
+    uint8_t (*non_zero_count)[48];
 
     /**
      * Motion vector cache.
@@ -336,7 +321,7 @@ typedef struct H264Context{
      * block_offset[ 0..23] for frame macroblocks
      * block_offset[24..47] for field macroblocks
      */
-    int block_offset[2*(16+8)];
+    int block_offset[2*(16*3)];
 
     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
     uint32_t *mb2br_xy;
@@ -356,9 +341,9 @@ typedef struct H264Context{
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
     uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
-    uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64];
+    uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
     uint32_t (*dequant4_coeff[6])[16];
-    uint32_t (*dequant8_coeff[2])[64];
+    uint32_t (*dequant8_coeff[6])[64];
 
     int slice_num;
     uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
@@ -408,15 +393,15 @@ typedef struct H264Context{
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
     DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
      * Cabac
      */
     CABACContext cabac;
-    uint8_t      cabac_state[460];
+    uint8_t      cabac_state[1024];
 
     /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
     uint16_t     *cbp_table;
@@ -721,27 +706,43 @@ o-o o-o
 */
 
 /* Scan8 organization:
- *   0 1 2 3 4 5 6 7
- * 0   u u y y y y y
- * 1 u U U y Y Y Y Y
- * 2 u U U y Y Y Y Y
- * 3   v v y Y Y Y Y
- * 4 v V V y Y Y Y Y
- * 5 v V V   DYDUDV
+ *    0 1 2 3 4 5 6 7
+ * 0  DY    y y y y y
+ * 1        y Y Y Y Y
+ * 2        y Y Y Y Y
+ * 3        y Y Y Y Y
+ * 4        y Y Y Y Y
+ * 5  DU    u u u u u
+ * 6        u U U U U
+ * 7        u U U U U
+ * 8        u U U U U
+ * 9        u U U U U
+ * 10 DV    v v v v v
+ * 11       v V V V V
+ * 12       v V V V V
+ * 13       v V V V V
+ * 14       v V V V V
  * DY/DU/DV are for luma/chroma DC.
  */
 
+#define LUMA_DC_BLOCK_INDEX   48
+#define CHROMA_DC_BLOCK_INDEX 49
+
 //This table must be here because scan8[constant] must be known at compiletime
-static const uint8_t scan8[16 + 2*4 + 3]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
- 4+5*8, 5+5*8, 6+5*8
+static const uint8_t scan8[16*3 + 3]={
+ 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+ 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+ 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+ 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+ 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+ 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+ 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+ 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+ 4+11*8, 5+11*8, 4+12*8, 5+12*8,
+ 6+11*8, 7+11*8, 6+12*8, 7+12*8,
+ 4+13*8, 5+13*8, 4+14*8, 5+14*8,
+ 6+13*8, 7+13*8, 6+14*8, 7+14*8,
+ 0+ 0*8, 0+ 5*8, 0+10*8
 };
 
 static av_always_inline uint32_t pack16to32(int a, int b){
@@ -773,11 +774,11 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
-    static const uint8_t left_block_options[4][16]={
-        {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
-        {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
-        {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
-        {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
+    static const uint8_t left_block_options[4][32]={
+        {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
+        {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
+        {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4},
+        {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}
     };
 
     h->topleft_partition= -1;
@@ -947,32 +948,41 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
-            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
-            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
-
-            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
-            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
-    }else {
-            h->non_zero_count_cache[1+8*0]=
-            h->non_zero_count_cache[2+8*0]=
-
-            h->non_zero_count_cache[1+8*3]=
-            h->non_zero_count_cache[2+8*3]=
-            AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
+        AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
+        if(CHROMA444){
+            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
+            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
+        }else{
+            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
+            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
+        }
+    }else{
+        uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+        AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
+        AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
+        AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
     }
 
     for (i=0; i<2; i++) {
         if(left_type[i]){
-            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
-            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
-                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
-                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+            h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
+            h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
+            if(CHROMA444){
+                h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
+                h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
+                h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
+                h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
+            }else{
+                h->non_zero_count_cache[3+8* 6 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
+                h->non_zero_count_cache[3+8*11 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+            }
         }else{
-                h->non_zero_count_cache[3+8*1 + 2*8*i]=
-                h->non_zero_count_cache[3+8*2 + 2*8*i]=
-                h->non_zero_count_cache[0+8*1 +   8*i]=
-                h->non_zero_count_cache[0+8*4 +   8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+            h->non_zero_count_cache[3+8* 1 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 2 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 6 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 7 + 2*8*i]=
+            h->non_zero_count_cache[3+8*11 + 2*8*i]=
+            h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
 
@@ -981,15 +991,15 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         if(top_type) {
             h->top_cbp = h->cbp_table[top_xy];
         } else {
-            h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
+            h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
         // left_cbp
         if (left_type[0]) {
-            h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
+            h->left_cbp =   (h->cbp_table[left_xy[0]] & 0x7F0)
                         |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
                         | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
         } else {
-            h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
+            h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
     }
     }
@@ -1190,11 +1200,21 @@ static inline int pred_intra_mode(H264Context *h, int n){
 static inline void write_back_non_zero_count(H264Context *h){
     const int mb_xy= h->mb_xy;
 
-    AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
-    AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
-    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
-    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
-    AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
+    AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
+    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
+    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
+    AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
+    AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
+
+    if(CHROMA444){
+        AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
+        AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
+        AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
+        AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
+    }
 }
 
 static inline void write_back_motion(H264Context *h, int mb_type){
@@ -1267,8 +1287,7 @@ static void av_unused decode_mb_skip(H264Context *h){
     const int mb_xy= h->mb_xy;
     int mb_type=0;
 
-    memset(h->non_zero_count[mb_xy], 0, 32);
-    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
+    memset(h->non_zero_count[mb_xy], 0, 48);
 
     if(MB_FIELD)
         mb_type|= MB_TYPE_INTERLACED;
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 69af1e2ded..f30f4e1c9c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -45,7 +45,7 @@
 
 /* Cabac pre state table */
 
-static const int8_t cabac_context_init_I[460][2] =
+static const int8_t cabac_context_init_I[1024][2] =
 {
     /* 0 - 10 */
     { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
@@ -211,10 +211,153 @@ static const int8_t cabac_context_init_I[460][2] =
     { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
     {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
     {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+
+    /* 460 -> 1024 */
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, { -14, 106 },
+    { -13,  97 }, { -15,  90 }, { -12,  90 }, { -18,  88 },
+    { -10,  73 }, {  -9,  79 }, { -14,  86 }, { -10,  73 },
+    { -10,  70 }, { -10,  69 }, {  -5,  66 }, {  -9,  64 },
+    {  -5,  58 }, {   2,  59 }, {  23, -13 }, {  26, -13 },
+    {  40, -15 }, {  49, -14 }, {  44,   3 }, {  45,   6 },
+    {  44,  34 }, {  33,  54 }, {  19,  82 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 }, { -17, 120 }, { -20, 112 },
+    { -18, 114 }, { -11,  85 }, { -15,  92 }, { -14,  89 },
+    { -26,  71 }, { -15,  81 }, { -14,  80 }, {   0,  68 },
+    { -14,  70 }, { -24,  56 }, { -23,  68 }, { -24,  50 },
+    { -11,  74 }, { -14, 106 }, { -13,  97 }, { -15,  90 },
+    { -12,  90 }, { -18,  88 }, { -10,  73 }, {  -9,  79 },
+    { -14,  86 }, { -10,  73 }, { -10,  70 }, { -10,  69 },
+    {  -5,  66 }, {  -9,  64 }, {  -5,  58 }, {   2,  59 },
+    {  23, -13 }, {  26, -13 }, {  40, -15 }, {  49, -14 },
+    {  44,   3 }, {  45,   6 }, {  44,  34 }, {  33,  54 },
+    {  19,  82 }, {  21, -10 }, {  24, -11 }, {  28,  -8 },
+    {  28,  -1 }, {  29,   3 }, {  29,   9 }, {  35,  20 },
+    {  29,  36 }, {  14,  67 }, {  -3,  75 }, {  -1,  23 },
+    {   1,  34 }, {   1,  43 }, {   0,  54 }, {  -2,  55 },
+    {   0,  61 }, {   1,  64 }, {   0,  68 }, {  -9,  92 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  -3,  71 }, {  -6,  42 }, {  -5,  50 }, {  -3,  54 },
+    {  -2,  62 }, {   0,  58 }, {   1,  63 }, {  -2,  72 },
+    {  -1,  74 }, {  -9,  91 }, {  -5,  67 }, {  -5,  27 },
+    {  -3,  39 }, {  -2,  44 }, {   0,  46 }, { -16,  64 },
+    {  -8,  68 }, { -10,  78 }, {  -6,  77 }, { -10,  86 },
+    { -12,  92 }, { -15,  55 }, { -10,  60 }, {  -6,  62 },
+    {  -4,  65 }, { -12,  73 }, {  -8,  76 }, {  -7,  80 },
+    {  -9,  88 }, { -17, 110 }, {  -3,  71 }, {  -6,  42 },
+    {  -5,  50 }, {  -3,  54 }, {  -2,  62 }, {   0,  58 },
+    {   1,  63 }, {  -2,  72 }, {  -1,  74 }, {  -9,  91 },
+    {  -5,  67 }, {  -5,  27 }, {  -3,  39 }, {  -2,  44 },
+    {   0,  46 }, { -16,  64 }, {  -8,  68 }, { -10,  78 },
+    {  -6,  77 }, { -10,  86 }, { -12,  92 }, { -15,  55 },
+    { -10,  60 }, {  -6,  62 }, {  -4,  65 }, { -12,  73 },
+    {  -8,  76 }, {  -7,  80 }, {  -9,  88 }, { -17, 110 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 }
 };
 
-static const int8_t cabac_context_init_PB[3][460][2] =
+static const int8_t cabac_context_init_PB[3][1024][2] =
 {
     /* i_cabac_init_idc == 0 */
     {
@@ -370,6 +513,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
         {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
         {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+
+        /* 460 - 1024 */
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {  -5,  85 },
+        {  -6,  81 }, { -10,  77 }, {  -7,  81 }, { -17,  80 },
+        { -18,  73 }, {  -4,  74 }, { -10,  83 }, {  -9,  71 },
+        {  -9,  67 }, {  -1,  61 }, {  -8,  66 }, { -14,  66 },
+        {   0,  59 }, {   2,  59 }, {   9,  -2 }, {  26,  -9 },
+        {  33,  -9 }, {  39,  -7 }, {  41,  -2 }, {  45,   3 },
+        {  49,   9 }, {  45,  27 }, {  36,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 }, {  -4,  79 }, {  -7,  71 },
+        {  -5,  69 }, {  -9,  70 }, {  -8,  66 }, { -10,  68 },
+        { -19,  73 }, { -12,  69 }, { -16,  70 }, { -15,  67 },
+        { -20,  62 }, { -19,  70 }, { -16,  66 }, { -22,  65 },
+        { -20,  63 }, {  -5,  85 }, {  -6,  81 }, { -10,  77 },
+        {  -7,  81 }, { -17,  80 }, { -18,  73 }, {  -4,  74 },
+        { -10,  83 }, {  -9,  71 }, {  -9,  67 }, {  -1,  61 },
+        {  -8,  66 }, { -14,  66 }, {   0,  59 }, {   2,  59 },
+        {   9,  -2 }, {  26,  -9 }, {  33,  -9 }, {  39,  -7 },
+        {  41,  -2 }, {  45,   3 }, {  49,   9 }, {  45,  27 },
+        {  36,  59 }, {  21, -13 }, {  33, -14 }, {  39,  -7 },
+        {  46,  -2 }, {  51,   2 }, {  60,   6 }, {  61,  17 },
+        {  55,  34 }, {  42,  62 }, {  -6,  66 }, {  -7,  35 },
+        {  -7,  42 }, {  -8,  45 }, {  -5,  48 }, { -12,  56 },
+        {  -6,  60 }, {  -5,  62 }, {  -8,  66 }, {  -8,  76 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {  -6,  76 }, {  -2,  44 },
+        {   0,  45 }, {   0,  52 }, {  -3,  64 }, {  -2,  59 },
+        {  -4,  70 }, {  -4,  75 }, {  -8,  82 }, { -17, 102 },
+        {  -9,  77 }, {   3,  24 }, {   0,  42 }, {   0,  48 },
+        {   0,  55 }, {  -6,  59 }, {  -7,  71 }, { -12,  83 },
+        { -11,  87 }, { -30, 119 }, {   1,  58 }, {  -3,  29 },
+        {  -1,  36 }, {   1,  38 }, {   2,  43 }, {  -6,  55 },
+        {   0,  58 }, {   0,  64 }, {  -3,  74 }, { -10,  90 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 }
     },
 
     /* i_cabac_init_idc == 1 */
@@ -526,6 +812,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
         {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
         {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+
+        /* 460 - 1024 */
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  -3,  81 },
+        {  -3,  76 }, {  -7,  72 }, {  -6,  78 }, { -12,  72 },
+        { -14,  68 }, {  -3,  70 }, {  -6,  76 }, {  -5,  66 },
+        {  -5,  62 }, {   0,  57 }, {  -4,  61 }, {  -9,  60 },
+        {   1,  54 }, {   2,  58 }, {  17, -10 }, {  32, -13 },
+        {  42,  -9 }, {  49,  -5 }, {  53,   0 }, {  64,   3 },
+        {  68,  10 }, {  66,  27 }, {  47,  57 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 }, {  -5,  85 }, {  -6,  81 },
+        { -10,  77 }, {  -7,  81 }, { -17,  80 }, { -18,  73 },
+        {  -4,  74 }, { -10,  83 }, {  -9,  71 }, {  -9,  67 },
+        {  -1,  61 }, {  -8,  66 }, { -14,  66 }, {   0,  59 },
+        {   2,  59 }, {  -3,  81 }, {  -3,  76 }, {  -7,  72 },
+        {  -6,  78 }, { -12,  72 }, { -14,  68 }, {  -3,  70 },
+        {  -6,  76 }, {  -5,  66 }, {  -5,  62 }, {   0,  57 },
+        {  -4,  61 }, {  -9,  60 }, {   1,  54 }, {   2,  58 },
+        {  17, -10 }, {  32, -13 }, {  42,  -9 }, {  49,  -5 },
+        {  53,   0 }, {  64,   3 }, {  68,  10 }, {  66,  27 },
+        {  47,  57 }, {  17, -10 }, {  32, -13 }, {  42,  -9 },
+        {  49,  -5 }, {  53,   0 }, {  64,   3 }, {  68,  10 },
+        {  66,  27 }, {  47,  57 }, {  -5,  71 }, {   0,  24 },
+        {  -1,  36 }, {  -2,  42 }, {  -2,  52 }, {  -9,  57 },
+        {  -6,  63 }, {  -4,  65 }, {  -4,  67 }, {  -7,  82 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, { -23, 112 }, { -15,  71 },
+        {  -7,  61 }, {   0,  53 }, {  -5,  66 }, { -11,  77 },
+        {  -9,  80 }, {  -9,  84 }, { -10,  87 }, { -34, 127 },
+        { -21, 101 }, {  -3,  39 }, {  -5,  53 }, {  -7,  61 },
+        { -11,  75 }, { -15,  77 }, { -17,  91 }, { -25, 107 },
+        { -25, 111 }, { -28, 122 }, { -11,  76 }, { -10,  44 },
+        { -10,  52 }, { -10,  57 }, {  -9,  58 }, { -16,  72 },
+        {  -7,  69 }, {  -4,  69 }, {  -5,  74 }, {  -9,  86 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 }
     },
 
     /* i_cabac_init_idc == 2 */
@@ -682,6 +1111,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
         {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
         {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+
+        /* 460 - 1024 */
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {  -3,  78 },
+        {  -8,  74 }, {  -9,  72 }, { -10,  72 }, { -18,  75 },
+        { -12,  71 }, { -11,  63 }, {  -5,  70 }, { -17,  75 },
+        { -14,  72 }, { -16,  67 }, {  -8,  53 }, { -14,  59 },
+        {  -9,  52 }, { -11,  68 }, {   9,  -2 }, {  30, -10 },
+        {  31,  -4 }, {  33,  -1 }, {  33,   7 }, {  31,  12 },
+        {  37,  23 }, {  31,  38 }, {  20,  64 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 }, {  -3,  78 }, {  -8,  74 },
+        {  -9,  72 }, { -10,  72 }, { -18,  75 }, { -12,  71 },
+        { -11,  63 }, {  -5,  70 }, { -17,  75 }, { -14,  72 },
+        { -16,  67 }, {  -8,  53 }, { -14,  59 }, {  -9,  52 },
+        { -11,  68 }, {  -3,  78 }, {  -8,  74 }, {  -9,  72 },
+        { -10,  72 }, { -18,  75 }, { -12,  71 }, { -11,  63 },
+        {  -5,  70 }, { -17,  75 }, { -14,  72 }, { -16,  67 },
+        {  -8,  53 }, { -14,  59 }, {  -9,  52 }, { -11,  68 },
+        {   9,  -2 }, {  30, -10 }, {  31,  -4 }, {  33,  -1 },
+        {  33,   7 }, {  31,  12 }, {  37,  23 }, {  31,  38 },
+        {  20,  64 }, {   9,  -2 }, {  30, -10 }, {  31,  -4 },
+        {  33,  -1 }, {  33,   7 }, {  31,  12 }, {  37,  23 },
+        {  31,  38 }, {  20,  64 }, {  -9,  71 }, {  -7,  37 },
+        {  -8,  44 }, { -11,  49 }, { -10,  56 }, { -12,  59 },
+        {  -8,  63 }, {  -9,  67 }, {  -6,  68 }, { -10,  79 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, { -24, 115 }, { -22,  82 },
+        {  -9,  62 }, {   0,  53 }, {   0,  59 }, { -14,  85 },
+        { -13,  89 }, { -13,  94 }, { -11,  92 }, { -29, 127 },
+        { -21, 100 }, { -14,  57 }, { -12,  67 }, { -11,  71 },
+        { -10,  77 }, { -21,  85 }, { -16,  88 }, { -23, 104 },
+        { -15,  98 }, { -37, 127 }, { -10,  82 }, {  -8,  48 },
+        {  -8,  61 }, {  -8,  66 }, {  -7,  70 }, { -14,  75 },
+        { -10,  79 }, {  -9,  83 }, { -12,  92 }, { -18, 108 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 }
     }
 };
 
@@ -695,7 +1267,7 @@ void ff_h264_init_cabac_states(H264Context *h) {
     else                                 tab = cabac_context_init_PB[h->cabac_init_idc];
 
     /* calculate pre-state */
-    for( i= 0; i < 460; i++ ) {
+    for( i= 0; i < 1024; i++ ) {
         int pre = 2*(((tab[i][0] * slice_qp) >>4 ) + tab[i][1]) - 127;
 
         pre^= pre>>31;
@@ -957,21 +1529,22 @@ static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda
     my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
 }
 
-static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
+static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int max_coeff, int is_dc ) {
     int nza, nzb;
     int ctx = 0;
+    static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
 
     if( is_dc ) {
-        if( cat == 0 ) {
-            nza = h->left_cbp&0x100;
-            nzb = h-> top_cbp&0x100;
-        } else {
+        if( cat == 3 ) {
             idx -= CHROMA_DC_BLOCK_INDEX;
             nza = (h->left_cbp>>(6+idx))&0x01;
             nzb = (h-> top_cbp>>(6+idx))&0x01;
+        } else {
+            idx -= LUMA_DC_BLOCK_INDEX;
+            nza = h->left_cbp&(0x100<<idx);
+            nzb = h-> top_cbp&(0x100<<idx);
         }
     } else {
-        assert(cat == 1 || cat == 2 || cat == 4);
         nza = h->non_zero_count_cache[scan8[idx] - 1];
         nzb = h->non_zero_count_cache[scan8[idx] - 8];
     }
@@ -982,7 +1555,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
     if( nzb > 0 )
         ctx += 2;
 
-    return ctx + 4 * cat;
+    return base_ctx[cat] + ctx;
 }
 
 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
@@ -993,16 +1566,16 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
 };
 
 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
-    static const int significant_coeff_flag_offset[2][6] = {
-      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
-      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
+    static const int significant_coeff_flag_offset[2][14] = {
+      { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
+      { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
     };
-    static const int last_coeff_flag_offset[2][6] = {
-      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
-      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
+    static const int last_coeff_flag_offset[2][14] = {
+      { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
+      { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
     };
-    static const int coeff_abs_level_m1_offset[6] = {
-        227+0, 227+10, 227+20, 227+30, 227+39, 426
+    static const int coeff_abs_level_m1_offset[14] = {
+        227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
     };
     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
@@ -1057,7 +1630,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     abs_level_m1_ctx_base = h->cabac_state
         + coeff_abs_level_m1_offset[cat];
 
-    if( !is_dc && cat == 5 ) {
+    if( !is_dc && max_coeff == 64 ) {
 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
         for(last= 0; last < coefs; last++) { \
             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
@@ -1075,9 +1648,11 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         }
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
-        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
+                                                 last_coeff_ctx_base-significant_coeff_ctx_base, sig_off);
     } else {
-        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
+                                             last_coeff_ctx_base-significant_coeff_ctx_base);
 #else
         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
     } else {
@@ -1087,16 +1662,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     assert(coeff_count > 0);
 
     if( is_dc ) {
-        if( cat == 0 )
-            h->cbp_table[h->mb_xy] |= 0x100;
-        else
+        if( cat == 3 )
             h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
+        else
+            h->cbp_table[h->mb_xy] |= 0x100 << (n - LUMA_DC_BLOCK_INDEX);
         h->non_zero_count_cache[scan8[n]] = coeff_count;
     } else {
-        if( cat == 5 )
+        if( max_coeff == 64 )
             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
         else {
-            assert( cat == 1 || cat == 2 || cat == 4 );
+            assert( cat == 1 || cat ==  2 || cat ==  4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 );
             h->non_zero_count_cache[scan8[n]] = coeff_count;
         }
     }
@@ -1179,7 +1754,7 @@ static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block
 
 static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
     /* read coded block flag */
-    if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) {
+    if( get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 1 ) ] ) == 0 ) {
         h->non_zero_count_cache[scan8[n]] = 0;
         return;
     }
@@ -1188,13 +1763,68 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
 
 static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
     /* read coded block flag */
-    if( cat != 5 && get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) {
-        h->non_zero_count_cache[scan8[n]] = 0;
+    if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
+        if( max_coeff == 64 ) {
+            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 0, 1);
+        } else {
+            h->non_zero_count_cache[scan8[n]] = 0;
+        }
         return;
     }
     decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff );
 }
 
+static av_always_inline void decode_cabac_luma_residual( H264Context *h, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p )
+{
+    static const uint8_t ctx_cat[4][3] = {{0,6,10},{1,7,11},{2,8,12},{5,9,13}};
+    const uint32_t *qmul;
+    int i8x8, i4x4;
+    MpegEncContext * const s = &h->s;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    if( IS_INTRA16x16( mb_type ) ) {
+        //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
+        AV_ZERO128(h->mb_luma_dc[p]+0);
+        AV_ZERO128(h->mb_luma_dc[p]+8);
+        AV_ZERO128(h->mb_luma_dc[p]+16);
+        AV_ZERO128(h->mb_luma_dc[p]+24);
+        decode_cabac_residual_dc(h, h->mb_luma_dc[p], ctx_cat[0][p], LUMA_DC_BLOCK_INDEX+p, scan, 16);
+
+        if( cbp&15 ) {
+            qmul = h->dequant4_coeff[p][qscale];
+            for( i4x4 = 0; i4x4 < 16; i4x4++ ) {
+                const int index = 16*p + i4x4;
+                //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", index );
+                decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[1][p], index, scan + 1, qmul, 15);
+            }
+        } else {
+            fill_rectangle(&h->non_zero_count_cache[scan8[16*p]], 4, 4, 8, 0, 1);
+        }
+    } else {
+        int cqm = (IS_INTRA( mb_type ) ? 0:3) + p;
+        for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+            if( cbp & (1<<i8x8) ) {
+                if( IS_8x8DCT(mb_type) ) {
+                    const int index = 16*p + 4*i8x8;
+                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[3][p], index,
+                                                scan8x8, h->dequant8_coeff[cqm][qscale], 64);
+                } else {
+                    qmul = h->dequant4_coeff[cqm][qscale];
+                    for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+                        const int index = 16*p + 4*i8x8 + i4x4;
+                        //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+//START_TIMER
+                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[2][p], index, scan, qmul, 16);
+//STOP_TIMER("decode_residual")
+                    }
+                }
+            } else {
+                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
+                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+            }
+        }
+    }
+}
+
 /**
  * decodes a macroblock
  * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
@@ -1204,6 +1834,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int mb_xy;
     int mb_type, partition_count, cbp = 0;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -1313,7 +1944,8 @@ decode_intra_mb:
     h->slice_table[ mb_xy ]= h->slice_num;
 
     if(IS_INTRA_PCM(mb_type)) {
-        const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
+        static const uint16_t mb_sizes[4] = {256,384,512,768};
+        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
         const uint8_t *ptr;
 
         // We assume these blocks are very rare so we do not optimize it.
@@ -1326,20 +1958,17 @@ decode_intra_mb:
         }
 
         // The pixels are stored in the same order as levels in h->mb array.
-        memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
-        if(CHROMA){
-            memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
-        }
+        memcpy(h->mb, ptr, mb_size); ptr+=mb_size;
 
         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
 
         // All blocks are present
-        h->cbp_table[mb_xy] = 0x1ef;
+        h->cbp_table[mb_xy] = 0xf7ef;
         h->chroma_pred_mode_table[mb_xy] = 0;
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 32);
+        memset(h->non_zero_count[mb_xy], 16, 48);
         s->current_picture.mb_type[mb_xy]= mb_type;
         h->last_qscale_diff = 0;
         return 0;
@@ -1376,7 +2005,7 @@ decode_intra_mb:
             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
             if( h->intra16x16_pred_mode < 0 ) return -1;
         }
-        if(CHROMA){
+        if(decode_chroma){
             h->chroma_pred_mode_table[mb_xy] =
             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
 
@@ -1605,7 +2234,7 @@ decode_intra_mb:
 
     if( !IS_INTRA16x16( mb_type ) ) {
         cbp  = decode_cabac_mb_cbp_luma( h );
-        if(CHROMA)
+        if(decode_chroma)
             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
     }
 
@@ -1614,6 +2243,28 @@ decode_intra_mb:
     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
         mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
     }
+
+    /* It would be better to do this in fill_decode_caches, but we don't know
+     * the transform mode of the current macroblock there. */
+    if (CHROMA444 && IS_8x8DCT(mb_type)){
+        int i;
+        for (i = 0; i < 2; i++){
+            if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
+                h->non_zero_count_cache[3+8* 1 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 2 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 6 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 7 + 2*8*i]=
+                h->non_zero_count_cache[3+8*11 + 2*8*i]=
+                h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+            }
+        }
+        if (h->top_type && !IS_8x8DCT(h->top_type)){
+            uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+            AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
+            AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
+            AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+        }
+    }
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if( cbp || IS_INTRA16x16( mb_type ) ) {
@@ -1658,76 +2309,38 @@ decode_intra_mb:
         }else
             h->last_qscale_diff=0;
 
-        if( IS_INTRA16x16( mb_type ) ) {
-            int i;
-            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
-            AV_ZERO128(h->mb_luma_dc+0);
-            AV_ZERO128(h->mb_luma_dc+8);
-            AV_ZERO128(h->mb_luma_dc+16);
-            AV_ZERO128(h->mb_luma_dc+24);
-            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
+        decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 0);
+        if(CHROMA444){
+            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
+            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
+        } else {
+            if( cbp&0x30 ){
+                int c;
+                for( c = 0; c < 2; c++ ) {
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
+                    decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+                }
+            }
 
-            if( cbp&15 ) {
-                qmul = h->dequant4_coeff[0][s->qscale];
-                for( i = 0; i < 16; i++ ) {
-                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
+            if( cbp&0x20 ) {
+                int c, i;
+                for( c = 0; c < 2; c++ ) {
+                    qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
+                    for( i = 0; i < 4; i++ ) {
+                        const int index = 16 + 16 * c + i;
+                        //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
+                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
+                    }
                 }
             } else {
-                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        } else {
-            int i8x8, i4x4;
-            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
-                if( cbp & (1<<i8x8) ) {
-                    if( IS_8x8DCT(mb_type) ) {
-                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
-                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
-                    } else {
-                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
-                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
-                            const int index = 4*i8x8 + i4x4;
-                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
-//START_TIMER
-                            decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
-//STOP_TIMER("decode_residual")
-                        }
-                    }
-                } else {
-                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-                }
-            }
-        }
-
-        if( cbp&0x30 ){
-            int c;
-            for( c = 0; c < 2; c++ ) {
-                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
-            }
-        }
-
-        if( cbp&0x20 ) {
-            int c, i;
-            for( c = 0; c < 2; c++ ) {
-                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
-                for( i = 0; i < 4; i++ ) {
-                    const int index = 16 + 4 * c + i;
-                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
-                }
-            }
-        } else {
-            uint8_t * const nnz= &h->non_zero_count_cache[0];
-            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         }
     } else {
-        uint8_t * const nnz= &h->non_zero_count_cache[0];
-        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
-        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
         h->last_qscale_diff = 0;
     }
 
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 2e5ea54679..497166b423 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -371,12 +371,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     //FIXME put trailing_onex into the context
 
-    if(n >= CHROMA_DC_BLOCK_INDEX){
+    if(max_coeff <= 8){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
     }else{
-        if(n == LUMA_DC_BLOCK_INDEX){
-            total_coeff= pred_non_zero_count(h, 0);
+        if(n >= LUMA_DC_BLOCK_INDEX){
+            total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
             total_coeff= coeff_token>>2;
         }else{
@@ -482,7 +482,8 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     if(total_coeff == max_coeff)
         zeros_left=0;
     else{
-        if(n >= CHROMA_DC_BLOCK_INDEX)
+        /* FIXME: we don't actually support 4:2:2 yet. */
+        if(max_coeff <= 8)
             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
         else
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
@@ -536,12 +537,80 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     return 0;
 }
 
+static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
+    int i4x4, i8x8;
+    MpegEncContext * const s = &h->s;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    if(IS_INTRA16x16(mb_type)){
+        AV_ZERO128(h->mb_luma_dc[p]+0);
+        AV_ZERO128(h->mb_luma_dc[p]+8);
+        AV_ZERO128(h->mb_luma_dc[p]+16);
+        AV_ZERO128(h->mb_luma_dc[p]+24);
+        if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
+            return -1; //FIXME continue if partitioned and other return -1 too
+        }
+
+        assert((cbp&15) == 0 || (cbp&15) == 15);
+
+        if(cbp&15){
+            for(i8x8=0; i8x8<4; i8x8++){
+                for(i4x4=0; i4x4<4; i4x4++){
+                    const int index= i4x4 + 4*i8x8 + p*16;
+                    if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
+                        index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
+                        return -1;
+                    }
+                }
+            }
+            return 0xf;
+        }else{
+            fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
+            return 0;
+        }
+    }else{
+        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
+        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
+        int new_cbp = 0;
+        for(i8x8=0; i8x8<4; i8x8++){
+            if(cbp & (1<<i8x8)){
+                if(IS_8x8DCT(mb_type)){
+                    DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
+                    uint8_t *nnz;
+                    for(i4x4=0; i4x4<4; i4x4++){
+                        const int index= i4x4 + 4*i8x8 + p*16;
+                        if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
+                                            h->dequant8_coeff[cqm][qscale], 16) < 0 )
+                            return -1;
+                    }
+                    nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
+                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
+                    new_cbp |= !!nnz[0] << i8x8;
+                }else{
+                    for(i4x4=0; i4x4<4; i4x4++){
+                        const int index= i4x4 + 4*i8x8 + p*16;
+                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
+                                            scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
+                            return -1;
+                        }
+                        new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
+                    }
+                }
+            }else{
+                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
+                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+            }
+        }
+        return new_cbp;
+    }
+}
+
 int ff_h264_decode_mb_cavlc(H264Context *h){
     MpegEncContext * const s = &h->s;
     int mb_xy;
     int partition_count;
     unsigned int mb_type, cbp;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -608,19 +677,21 @@ decode_intra_mb:
 
     if(IS_INTRA_PCM(mb_type)){
         unsigned int x;
+        static const uint16_t mb_sizes[4] = {256,384,512,768};
+        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
 
         // We assume these blocks are very rare so we do not optimize it.
         align_get_bits(&s->gb);
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
+        for(x=0; x < mb_size; x++){
             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
 
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 32);
+        memset(h->non_zero_count[mb_xy], 16, 48);
 
         s->current_picture.mb_type[mb_xy]= mb_type;
         return 0;
@@ -668,7 +739,7 @@ decode_intra_mb:
             if(h->intra16x16_pred_mode < 0)
                 return -1;
         }
-        if(CHROMA){
+        if(decode_chroma){
             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
             if(pred_mode < 0)
                 return -1;
@@ -896,15 +967,19 @@ decode_intra_mb:
 
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&s->gb);
-        if(cbp > 47){
-            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
-            return -1;
-        }
 
-        if(CHROMA){
+        if(decode_chroma){
+            if(cbp > 47){
+                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
+                return -1;
+            }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
             else                     cbp= golomb_to_inter_cbp   [cbp];
         }else{
+            if(cbp > 15){
+                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
+                return -1;
+            }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
             else                     cbp= golomb_to_inter_cbp_gray[cbp];
         }
@@ -918,8 +993,9 @@ decode_intra_mb:
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if(cbp || IS_INTRA16x16(mb_type)){
-        int i8x8, i4x4, chroma_idx;
+        int i4x4, chroma_idx;
         int dquant;
+        int ret;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
         const uint8_t *scan, *scan8x8;
         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
@@ -947,85 +1023,45 @@ decode_intra_mb:
 
         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
-        if(IS_INTRA16x16(mb_type)){
-            AV_ZERO128(h->mb_luma_dc+0);
-            AV_ZERO128(h->mb_luma_dc+8);
-            AV_ZERO128(h->mb_luma_dc+16);
-            AV_ZERO128(h->mb_luma_dc+24);
-            if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
-                return -1; //FIXME continue if partitioned and other return -1 too
+
+        if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
+            return -1;
+        }
+        h->cbp_table[mb_xy] |= ret << 12;
+        if(CHROMA444){
+            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
+                return -1;
+            }
+            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
+                return -1;
+            }
+        } else {
+            if(cbp&0x30){
+                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
+                    if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+                        return -1;
+                    }
             }
 
-            assert((cbp&15) == 0 || (cbp&15) == 15);
-
-            if(cbp&15){
-                for(i8x8=0; i8x8<4; i8x8++){
+            if(cbp&0x20){
+                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                     for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+                        const int index= 16 + 16*chroma_idx + i4x4;
+                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
                             return -1;
                         }
                     }
                 }
             }else{
-                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        }else{
-            for(i8x8=0; i8x8<4; i8x8++){
-                if(cbp & (1<<i8x8)){
-                    if(IS_8x8DCT(mb_type)){
-                        DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
-                        uint8_t *nnz;
-                        for(i4x4=0; i4x4<4; i4x4++){
-                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
-                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
-                                return -1;
-                        }
-                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
-                    }else{
-                        for(i4x4=0; i4x4<4; i4x4++){
-                            const int index= i4x4 + 4*i8x8;
-
-                            if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
-                                return -1;
-                            }
-                        }
-                    }
-                }else{
-                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-                }
-            }
-        }
-
-        if(cbp&0x30){
-            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
-                    return -1;
-                }
-        }
-
-        if(cbp&0x20){
-            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
-                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
-                for(i4x4=0; i4x4<4; i4x4++){
-                    const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
-                        return -1;
-                    }
-                }
-            }
-        }else{
-            uint8_t * const nnz= &h->non_zero_count_cache[0];
-            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         }
     }else{
-        uint8_t * const nnz= &h->non_zero_count_cache[0];
-        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
-        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
     }
     s->current_picture.qscale_table[mb_xy]= s->qscale;
     write_back_non_zero_count(h);
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 72b1905936..d4ecefcf08 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -220,7 +220,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
@@ -353,9 +353,10 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     return v;
 }
 
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma444, int dir) {
     MpegEncContext * const s = &h->s;
     int edge;
+    int chroma_qp_avg[2];
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
     const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
 
@@ -394,7 +395,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                         bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]);
                         bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]);
                     }else{
-                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8;
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
                     int i;
                     for( i = 0; i < 4; i++ ) {
                         bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
@@ -407,10 +408,15 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
                 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
-                filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
-                                ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
-                filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
-                                ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
+                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                if (chroma444) {
+                    filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                    filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                } else {
+                    filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                    filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                }
             }
         }else{
             DECLARE_ALIGNED(8, int16_t, bS)[4];
@@ -465,23 +471,29 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
                 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                 if( dir == 0 ) {
                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
                     {
-                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h);
-                        if(h->pps.chroma_qp_diff)
-                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h);
+                        if (chroma444) {
+                            filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        } else {
+                            filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        }
                     }
                 } else {
                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
                     {
-                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h);
-                        if(h->pps.chroma_qp_diff)
-                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h);
+                        if (chroma444) {
+                            filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        } else {
+                            filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        }
                     }
                 }
             }
@@ -545,13 +557,19 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
-            if( (edge&1) == 0 ) {
+            if (chroma444) {
+                filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+            } else if( (edge&1) == 0 ) {
                 filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
                 filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
-            if( (edge&1) == 0 ) {
+            if (chroma444) {
+                filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+            } else if( (edge&1) == 0 ) {
                 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
                 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
             }
@@ -589,11 +607,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         } else {
             static const uint8_t offset[2][2][8]={
                 {
-                    {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1},
-                    {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3},
+                    {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
+                    {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
                 },{
-                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
-                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
+                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
+                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
                 }
             };
             const uint8_t *off= offset[MB_FIELD][mb_y&1];
@@ -650,9 +668,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
 
 #if CONFIG_SMALL
     for( dir = 0; dir < 2; dir++ )
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, CHROMA444, dir);
 #else
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, CHROMA444, 0);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, CHROMA444, 1);
 #endif
 }
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index a98f14aaf6..9c41e4ca73 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -269,7 +269,7 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
-        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
+        fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
     };
     if(get_bits1(&s->gb)){
         sps->scaling_matrix_present |= is_sps;
@@ -281,7 +281,15 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
         if(is_sps || pps->transform_8x8_mode){
             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
-            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
+            if(h->sps.chroma_format_idc == 3){
+                decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[0],scaling_matrix8[0]);  // Intra, Cr
+                decode_scaling_list(h,scaling_matrix8[2],64,default_scaling8[0],scaling_matrix8[1]);  // Intra, Cb
+            }
+            decode_scaling_list(h,scaling_matrix8[3],64,default_scaling8[1],fallback[3]);  // Inter, Y
+            if(h->sps.chroma_format_idc == 3){
+                decode_scaling_list(h,scaling_matrix8[4],64,default_scaling8[1],scaling_matrix8[3]);  // Inter, Cr
+                decode_scaling_list(h,scaling_matrix8[5],64,default_scaling8[1],scaling_matrix8[4]);  // Inter, Cb
+            }
         }
     }
 }
@@ -395,7 +403,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
         if(sps->crop_left || sps->crop_top){
             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
         }
-        if(sps->crop_right >= 8 || sps->crop_bottom >= 8){
+        if(sps->crop_right >= (8<<CHROMA444) || sps->crop_bottom >= (8<<CHROMA444)){
             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
         }
     }else{
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 864c118bb5..6972725781 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -66,10 +66,10 @@ typedef struct H264DSPContext{
     void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
 
-    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
     void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 39c9a1c9eb..e7f9af7fb0 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -30,15 +30,19 @@
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
 #define AVCODEC_H264IDCT_INTERNAL_H
 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-static const uint8_t scan8[16 + 2*4]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
+static const uint8_t scan8[16*3]={
+ 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+ 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+ 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+ 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+ 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+ 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+ 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+ 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+ 4+11*8, 5+11*8, 4+12*8, 5+12*8,
+ 6+11*8, 7+11*8, 6+12*8, 7+12*8,
+ 4+13*8, 5+13*8, 4+14*8, 5+14*8,
+ 6+13*8, 7+13*8, 6+14*8, 7+14*8
 };
 #endif
 
@@ -190,7 +194,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     }
 }
 
-void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         int nnz = nnzc[ scan8[i] ];
@@ -201,7 +205,7 @@ void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         if(nnzc[ scan8[i] ])             FUNCC(idct_internal      )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
@@ -209,7 +213,7 @@ void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTEL
     }
 }
 
-void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i+=4){
         int nnz = nnzc[ scan8[i] ];
@@ -220,13 +224,15 @@ void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=16; i<16+8; i++){
-        if(nnzc[ scan8[i] ])
-            FUNCC(ff_h264_idct_add   )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
-        else if(((dctcoef*)block)[i*16])
-            FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+    int i, j;
+    for(j=1; j<3; j++){
+        for(i=j*16; i<j*16+4; i++){
+            if(nnzc[ scan8[i] ])
+                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+            else if(((dctcoef*)block)[i*16])
+                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+        }
     }
 }
 /**
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 6a45da8761..4978d28b49 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1185,15 +1185,17 @@ void MPV_frame_end(MpegEncContext *s)
        && s->current_picture.reference
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+            int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
+            int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  ,
-                              s->h_edge_pos   , s->v_edge_pos   ,
-                              EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos             , s->v_edge_pos,
+                              EDGE_WIDTH        , EDGE_WIDTH        , EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
+                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
+                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
     }
 
     emms_c();
@@ -2284,14 +2286,19 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
         int sides = 0, edge_h;
+        int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
+        int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
         if (y==0) sides |= EDGE_TOP;
         if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
 
         edge_h= FFMIN(h, s->v_edge_pos - y);
 
-        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y    *s->linesize  , s->linesize  , s->h_edge_pos   , edge_h   , EDGE_WIDTH  , sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y         *s->linesize  , s->linesize,
+                          s->h_edge_pos        , edge_h        , EDGE_WIDTH        , EDGE_WIDTH        , sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
+                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
+                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
     }
 
     h= FFMIN(h, s->avctx->height - y);
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 6db0b290ba..28f04f119b 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1978,13 +1978,13 @@ static int frame_start(SnowContext *s){
     if(s->current_picture.data[0]){
         s->dsp.draw_edges(s->current_picture.data[0],
                           s->current_picture.linesize[0], w   , h   ,
-                          EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[1],
                           s->current_picture.linesize[1], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[2],
                           s->current_picture.linesize[2], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
     }
 
     release_buffer(s->avctx);
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 1cc6991666..214c6a3945 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -784,7 +784,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
 
 /* draw the edges of width 'w' of an image of size width, height
    this mmx version can only handle w==8 || w==16 */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides)
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
 {
     uint8_t *ptr, *last_line;
     int i;
@@ -839,7 +839,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
 
     /* top and bottom (and hopefully also the corners) */
     if (sides&EDGE_TOP) {
-        for(i = 0; i < w; i += 4) {
+        for(i = 0; i < h; i += 4) {
             ptr= buf - (i + 1) * wrap - w;
             __asm__ volatile(
                     "1:                             \n\t"
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index c850dc2ef3..b5f77c90d5 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -36,7 +36,7 @@
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
 static int decode_significance_x86(CABACContext *c, int max_coeff,
                                    uint8_t *significant_coeff_ctx_base,
-                                   int *index){
+                                   int *index, int last_off){
     void *end= significant_coeff_ctx_base + max_coeff - 1;
     int minusstart= -(int)significant_coeff_ctx_base;
     int minusindex= 4-(int)index;
@@ -52,10 +52,12 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         "test $1, %%edx                         \n\t"
         " jz 3f                                 \n\t"
+        "add  %7, %1                            \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
+        "sub  %7, %1                            \n\t"
         "mov  %2, %%"REG_a"                     \n\t"
         "movl %4, %%ecx                         \n\t"
         "add  %1, %%"REG_c"                     \n\t"
@@ -82,7 +84,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
-        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
+        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
         : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
     );
     return coeff_count;
@@ -90,7 +92,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
 static int decode_significance_8x8_x86(CABACContext *c,
                                        uint8_t *significant_coeff_ctx_base,
-                                       int *index, const uint8_t *sig_off){
+                                       int *index, int last_off, const uint8_t *sig_off){
     int minusindex= 4-(int)index;
     int coeff_count;
     x86_reg last=0;
@@ -114,8 +116,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
         "add %5, %%"REG_D"                      \n\t"
+        "add %7, %%"REG_D"                      \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
         "mov %2, %%"REG_a"                      \n\t"
@@ -142,7 +145,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count),"+m"(last), "+m"(index)
-        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off)
         : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
     );
     return coeff_count;
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index f90f41c4bc..4788da98e0 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -32,14 +32,18 @@
 SECTION_RODATA
 
 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
-           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
-           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
-           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
-           db 1+1*8, 2+1*8
-           db 1+2*8, 2+2*8
-           db 1+4*8, 2+4*8
-           db 1+5*8, 2+5*8
+scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
+           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
+           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
+           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
 %ifdef PIC
 %define scan8 r11
 %else
@@ -617,6 +621,8 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
     mov         r10, r0
 %endif
     call         h264_idct_add8_mmx_plane
+    mov          r5, 32
+    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -678,6 +684,8 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
     lea         r11, [scan8_mem]
 %endif
     call h264_idct_add8_mmx2_plane
+    mov          r5, 32
+    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -810,12 +818,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     test        r0, r0
     jz .try%1dc
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+%1*8+64]
+    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+%1*8+64]
+    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
 %endif
     call        x264_add8x4_idct_sse2
     jmp .cycle%1end
@@ -824,16 +832,18 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     or         r0w, word [r2+32]
     jz .cycle%1end
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+%1*8+64]
+    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+%1*8+64]
+    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
 %endif
     call        h264_idct_dc_add8_mmx2
 .cycle%1end
-%if %1 < 3
+%if %1 == 1
+    add         r2, 384+64
+%elif %1 < 3
     add         r2, 64
 %endif
 %endmacro
@@ -845,15 +855,15 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8
 %ifdef ARCH_X86_64
     mov         r10, r0
 %endif
-    add8_sse2_cycle 0, 0x09
-    add8_sse2_cycle 1, 0x11
+    add8_sse2_cycle 0, 0x34
+    add8_sse2_cycle 1, 0x3c
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
     add        r0mp, gprsize
 %endif
-    add8_sse2_cycle 2, 0x21
-    add8_sse2_cycle 3, 0x29
+    add8_sse2_cycle 2, 0x5c
+    add8_sse2_cycle 3, 0x64
     RET
 
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 3f7cf4cefc..54636a95d0 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -29,14 +29,18 @@ SECTION_RODATA
 
 pw_pixel_max: times 8 dw ((1 << 10)-1)
 pd_32:        times 4 dd 32
-scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
-           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
-           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
-           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
-           db 1+1*8, 2+1*8
-           db 1+2*8, 2+2*8
-           db 1+4*8, 2+4*8
-           db 1+5*8, 2+5*8
+scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
+           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
+           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
+           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
 
 %ifdef PIC
 %define scan8 r11
@@ -306,7 +310,7 @@ INIT_AVX
 IDCT_ADD16INTRA_10 avx
 %endif
 
-%assign last_block 24
+%assign last_block 36
 ;-----------------------------------------------------------------------------
 ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
@@ -317,21 +321,22 @@ cglobal h264_idct_add8_10_%1,5,7
 %endif
     add      r2, 1024
     mov      r0, [r0]
-    ADD16_OP_INTRA %1, 16, 1+1*8
-    ADD16_OP_INTRA %1, 18, 1+2*8
+    ADD16_OP_INTRA %1, 16, 4+ 6*8
+    ADD16_OP_INTRA %1, 18, 4+ 7*8
+    add      r2, 1024-128*2
 %ifdef ARCH_X86_64
     mov      r0, [r10+gprsize]
 %else
     mov      r0, r0m
     mov      r0, [r0+gprsize]
 %endif
-    ADD16_OP_INTRA %1, 20, 1+4*8
-    ADD16_OP_INTRA %1, 22, 1+5*8
+    ADD16_OP_INTRA %1, 32, 4+11*8
+    ADD16_OP_INTRA %1, 34, 4+12*8
     REP_RET
     AC %1, 16
     AC %1, 18
-    AC %1, 20
-    AC %1, 22
+    AC %1, 32
+    AC %1, 34
 
 %endmacro ; IDCT_ADD8
 

From c177cfb4fb2430f4d43d27ba3c3476176f17d006 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Thu, 9 Jun 2011 16:17:41 -0700
Subject: [PATCH 788/830] H.264: fix CODEC_FLAG_GRAY

It was broken in 4:4:4, and still did chroma deblocking for no reason in 4:2:0.
---
 libavcodec/h264.c            | 51 ++++++++++---------
 libavcodec/h264_loopfilter.c | 98 ++++++++++++++++++++----------------
 2 files changed, 84 insertions(+), 65 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 86ea218807..78ca4141a4 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -484,6 +484,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
 
+    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
+
     if(chroma444){
         src_cb = pic->data[1] + offset;
         if(emu){
@@ -509,8 +511,6 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         return;
     }
 
-    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
-
     if(MB_FIELD){
         // chroma offset when predicting from a field of opposite parity
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
@@ -1847,24 +1847,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                 for (j = 0; j < 16; j++)
                     tmp_y[j] = get_bits(&gb, bit_depth);
             }
-            for (i = 0; i < 8; i++) {
-                uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
-                for (j = 0; j < 8; j++)
-                    tmp_cb[j] = get_bits(&gb, bit_depth);
-            }
-            for (i = 0; i < 8; i++) {
-                uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
-                for (j = 0; j < 8; j++)
-                    tmp_cr[j] = get_bits(&gb, bit_depth);
+            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                for (i = 0; i < 8; i++) {
+                    uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
+                    for (j = 0; j < 8; j++)
+                        tmp_cb[j] = get_bits(&gb, bit_depth);
+                }
+                for (i = 0; i < 8; i++) {
+                    uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
+                    for (j = 0; j < 8; j++)
+                        tmp_cr[j] = get_bits(&gb, bit_depth);
+                }
             }
         } else {
-        for (i=0; i<16; i++) {
-            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
-        }
-        for (i=0; i<8; i++) {
-            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
-            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
-        }
+            for (i=0; i<16; i++) {
+                memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
+            }
+            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                for (i=0; i<8; i++) {
+                    memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+                    memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
+                }
+            }
         }
     } else {
         if(IS_INTRA(mb_type)){
@@ -1954,8 +1958,9 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
     int i, j, p;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
+    const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
 
-    for (p = 0; p < 3; p++)
+    for (p = 0; p < plane_count; p++)
     {
         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
@@ -1996,7 +2001,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
             GetBitContext gb;
             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
 
-            for (p = 0; p < 3; p++) {
+            for (p = 0; p < plane_count; p++) {
                 for (i = 0; i < 16; i++) {
                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
                     for (j = 0; j < 16; j++)
@@ -2004,7 +2009,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
                 }
             }
         } else {
-            for (p = 0; p < 3; p++) {
+            for (p = 0; p < plane_count; p++) {
                 for (i = 0; i < 16; i++) {
                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
                 }
@@ -2015,7 +2020,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
 
-            for (p = 0; p < 3; p++)
+            for (p = 0; p < plane_count; p++)
                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
 
             if(h->deblocking_filter)
@@ -2035,7 +2040,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
                             h->h264dsp.biweight_h264_pixels_tab, 1);
         }
 
-        for (p = 0; p < 3; p++)
+        for (p = 0; p < plane_count; p++)
             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
     }
     if(h->cbp || IS_INTRA(mb_type))
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index d4ecefcf08..1ae534ec96 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -217,6 +217,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     int mb_xy;
     int mb_type, left_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     mb_xy = h->mb_xy;
 
@@ -262,16 +263,18 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
         }
-        if(left_type){
-            filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
-            filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+        if(chroma){
+            if(left_type){
+                filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+                filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+            }
+            filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
+            filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
+            filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+            filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
+            filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+            filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         }
-        filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
-        filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
-        filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-        filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
-        filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-        filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         return;
     } else {
         LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
@@ -298,7 +301,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 #define FILTER(hv,dir,edge)\
         if(AV_RN64A(bS[dir][edge])) {                                   \
             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
-            if(!(edge&1)) {\
+            if(chroma && !(edge&1)) {\
                 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
             }\
@@ -353,7 +356,7 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     return v;
 }
 
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma444, int dir) {
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
     MpegEncContext * const s = &h->s;
     int edge;
     int chroma_qp_avg[2];
@@ -410,12 +413,14 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
                 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
-                if (chroma444) {
-                    filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                    filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
-                } else {
-                    filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                    filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                if (chroma) {
+                    if (chroma444) {
+                        filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                        filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                    } else {
+                        filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                        filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                    }
                 }
             }
         }else{
@@ -475,7 +480,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                 if( dir == 0 ) {
                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
-                    {
+                    if (chroma) {
                         if (chroma444) {
                             filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                             filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
@@ -486,7 +491,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                     }
                 } else {
                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
-                    {
+                    if (chroma) {
                         if (chroma444) {
                             filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                             filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
@@ -557,21 +562,25 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
-            if (chroma444) {
-                filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
-            } else if( (edge&1) == 0 ) {
-                filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+            if (chroma) {
+                if (chroma444) {
+                    filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+                } else if( (edge&1) == 0 ) {
+                    filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+                }
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
-            if (chroma444) {
-                filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
-            } else if( (edge&1) == 0 ) {
-                filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+            if (chroma) {
+                if (chroma444) {
+                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+                } else if( (edge&1) == 0 ) {
+                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+                }
             }
         }
     }
@@ -584,6 +593,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     av_unused int dir;
+    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     if (FRAME_MBAFF
             // and current and left pair do not have the same interlaced type
@@ -652,25 +662,29 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         if(MB_FIELD){
             filter_mb_mbaff_edgev ( h, img_y                ,   linesize, bS  , 1, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  + 8*  linesize,   linesize, bS+4, 1, qp [1] );
-            filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
-            filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
+            if (chroma){
+                filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
+                filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
+            }
         }else{
             filter_mb_mbaff_edgev ( h, img_y              , 2*  linesize, bS  , 2, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  +   linesize, 2*  linesize, bS+1, 2, qp [1] );
-            filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
-            filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
+            if (chroma){
+                filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
+                filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
+            }
         }
     }
 
 #if CONFIG_SMALL
     for( dir = 0; dir < 2; dir++ )
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, CHROMA444, dir);
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
 #else
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, CHROMA444, 0);
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, CHROMA444, 1);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
 #endif
 }

From 295f0a2503550088a5ffddc5754b9fba2fa6ee60 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Mon, 13 Jun 2011 10:21:46 -0700
Subject: [PATCH 789/830] Fix SVQ3 after adding 4:4:4 H.264 support

---
 libavcodec/svq3.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 7cde5e5552..23ab209312 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -633,8 +633,9 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8);
     }
     if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
-        memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
-        s->dsp.clear_blocks(h->mb);
+        memset(h->non_zero_count_cache + 8, 0, 14*8*sizeof(uint8_t));
+        s->dsp.clear_blocks(h->mb+  0);
+        s->dsp.clear_blocks(h->mb+384);
     }
 
     if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
@@ -654,8 +655,8 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
     }
     if (IS_INTRA16x16(mb_type)) {
-        AV_ZERO128(h->mb_luma_dc+0);
-        AV_ZERO128(h->mb_luma_dc+8);
+        AV_ZERO128(h->mb_luma_dc[0]+0);
+        AV_ZERO128(h->mb_luma_dc[0]+8);
         if (svq3_decode_block(&s->gb, h->mb_luma_dc, 0, 1)){
             av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
             return -1;
@@ -681,20 +682,23 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
 
         if ((cbp & 0x30)) {
-            for (i = 0; i < 2; ++i) {
-              if (svq3_decode_block(&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
+            for (i = 1; i < 3; ++i) {
+              if (svq3_decode_block(&s->gb, &h->mb[16*16*i], 0, 3)){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
                 return -1;
               }
             }
 
             if ((cbp & 0x20)) {
-                for (i = 0; i < 8; i++) {
-                    h->non_zero_count_cache[ scan8[16+i] ] = 1;
+                for (i = 1; i < 3; i++) {
+                    for (j = 0; j < 4; j++) {
+                        k = 16*i + j;
+                        h->non_zero_count_cache[ scan8[k] ] = 1;
 
-                    if (svq3_decode_block(&s->gb, &h->mb[16*(16 + i)], 1, 1)){
-                        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
-                        return -1;
+                        if (svq3_decode_block(&s->gb, &h->mb[16*k], 1, 1)){
+                            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
+                            return -1;
+                        }
                     }
                 }
             }

From 504811baeacf8bac400962e84fca678b79068ceb Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Mon, 13 Jun 2011 13:38:46 -0700
Subject: [PATCH 790/830] Roll back 4:4:4 H.264 for now Needs some ARM/PPC asm
 modifications.

---
 libavcodec/dsputil.h               |   2 +-
 libavcodec/dsputil_template.c      |   6 +-
 libavcodec/h264.c                  | 797 ++++++++++------------------
 libavcodec/h264.h                  | 181 +++----
 libavcodec/h264_cabac.c            | 819 ++++-------------------------
 libavcodec/h264_cavlc.c            | 198 +++----
 libavcodec/h264_loopfilter.c       | 132 ++---
 libavcodec/h264_ps.c               |  14 +-
 libavcodec/h264dsp.h               |   8 +-
 libavcodec/h264idct_template.c     |  44 +-
 libavcodec/mpegvideo.c             |  25 +-
 libavcodec/snow.c                  |   6 +-
 libavcodec/svq3.c                  |  26 +-
 libavcodec/x86/dsputil_mmx.c       |   4 +-
 libavcodec/x86/h264_i386.h         |  15 +-
 libavcodec/x86/h264_idct.asm       |  44 +-
 libavcodec/x86/h264_idct_10bit.asm |  35 +-
 17 files changed, 687 insertions(+), 1669 deletions(-)

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 7a28b06fd5..cfc574aebb 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -507,7 +507,7 @@ typedef struct DSPContext {
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
-    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides);
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int sides);
 #define EDGE_WIDTH 16
 #define EDGE_TOP    1
 #define EDGE_BOTTOM 2
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index b85931856a..8ca6d3e414 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -79,7 +79,7 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS
 
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
-static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
 {
     pixel *buf = (pixel*)_buf;
     int wrap = _wrap / sizeof(pixel);
@@ -106,10 +106,10 @@ static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, i
     buf -= w;
     last_line = buf + (height - 1) * wrap;
     if (sides & EDGE_TOP)
-        for(i = 0; i < h; i++)
+        for(i = 0; i < w; i++)
             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
     if (sides & EDGE_BOTTOM)
-        for (i = 0; i < h; i++)
+        for (i = 0; i < w; i++)
             memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
 }
 
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 78ca4141a4..276d6e6d6c 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -451,13 +451,12 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
-                           int pixel_shift, int chroma444){
+                           int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
-    uint8_t * src_y = pic->data[0] + offset;
+    uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -486,31 +485,6 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
 
     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
 
-    if(chroma444){
-        src_cb = pic->data[1] + offset;
-        if(emu){
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
-                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
-        }
-        qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
-        if(!square){
-            qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
-        }
-
-        src_cr = pic->data[2] + offset;
-        if(emu){
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
-                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
-        }
-        qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
-        if(!square){
-            qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
-        }
-        return;
-    }
-
     if(MB_FIELD){
         // chroma offset when predicting from a field of opposite parity
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
@@ -537,19 +511,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1, int pixel_shift, int chroma444){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-    if(chroma444){
-        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-    }else{
-        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    }
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -557,7 +526,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift, chroma444);
+                           qpix_op, chroma_op, pixel_shift);
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
@@ -567,7 +536,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift, chroma444);
+                           qpix_op, chroma_op, pixel_shift);
     }
 }
 
@@ -577,19 +546,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1, int pixel_shift, int chroma444){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
 
-    dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-    if(chroma444){
-        chroma_weight_avg = luma_weight_avg;
-        chroma_weight_op = luma_weight_op;
-        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
-    }else{
-        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    }
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -597,17 +559,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
-        uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
+        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
@@ -632,7 +594,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put, pixel_shift, chroma444);
+                    qpix_put, chroma_put, pixel_shift);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
@@ -651,21 +613,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1, int pixel_shift, int chroma444){
+                           int list0, int list1, int pixel_shift){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
                          weight_op[0], weight_op[3], weight_avg[0],
-                         weight_avg[3], list0, list1, pixel_shift, chroma444);
+                         weight_avg[3], list0, list1, pixel_shift);
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
-                    chroma_avg, list0, list1, pixel_shift, chroma444);
+                    chroma_avg, list0, list1, pixel_shift);
 }
 
-static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
+static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
@@ -676,13 +638,8 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, in
         uint8_t **src= h->ref_list[list][refn].data;
         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        if(chroma444){
-            s->dsp.prefetch(src[1]+off, s->linesize, 4);
-            s->dsp.prefetch(src[2]+off, s->linesize, 4);
-        }else{
-            off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
-            s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
-        }
+        off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
+        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
     }
 }
 
@@ -690,7 +647,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                      int pixel_shift, int chroma444){
+                      int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
@@ -699,36 +656,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
 
     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h);
-    prefetch_motion(h, 0, pixel_shift, chroma444);
+    prefetch_motion(h, 0, pixel_shift);
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift, chroma444);
+                pixel_shift);
     }else if(IS_16X8(mb_type)){
         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift, chroma444);
+                pixel_shift);
         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift, chroma444);
+                pixel_shift);
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift, chroma444);
+                pixel_shift);
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift, chroma444);
+                pixel_shift);
     }else{
         int i;
 
@@ -745,29 +702,29 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift, chroma444);
+                    pixel_shift);
             }else if(IS_SUB_8X4(sub_mb_type)){
                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift, chroma444);
+                    pixel_shift);
                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift, chroma444);
+                    pixel_shift);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift, chroma444);
+                    pixel_shift);
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift, chroma444);
+                    pixel_shift);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
@@ -778,13 +735,13 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                        pixel_shift, chroma444);
+                        pixel_shift);
                 }
             }
         }
     }
 
-    prefetch_motion(h, 1, pixel_shift, chroma444);
+    prefetch_motion(h, 1, pixel_shift);
 }
 
 #define hl_motion_fn(sh, bits) \
@@ -796,11 +753,10 @@ static av_always_inline void hl_motion_ ## bits(H264Context *h, \
                                        qpel_mc_func (*qpix_avg)[16], \
                                        h264_chroma_mc_func (*chroma_avg), \
                                        h264_weight_func *weight_op, \
-                                       h264_biweight_func *weight_avg, \
-                                       int chroma444) \
+                                       h264_biweight_func *weight_avg) \
 { \
     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
-              qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
+              qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
 }
 hl_motion_fn(0, 8);
 hl_motion_fn(1, 16);
@@ -840,19 +796,16 @@ static void free_tables(H264Context *h, int free_rbsp){
 }
 
 static void init_dequant8_coeff_table(H264Context *h){
-    int i,j,q,x;
+    int i,q,x;
     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
+    h->dequant8_coeff[0] = h->dequant8_buffer[0];
+    h->dequant8_coeff[1] = h->dequant8_buffer[1];
 
-    for(i=0; i<6; i++ ){
-        h->dequant8_coeff[i] = h->dequant8_buffer[i];
-        for(j=0; j<i; j++){
-            if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
-                h->dequant8_coeff[i] = h->dequant8_buffer[j];
-                break;
-            }
+    for(i=0; i<2; i++ ){
+        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
+            h->dequant8_coeff[1] = h->dequant8_buffer[0];
+            break;
         }
-        if(j<i)
-            continue;
 
         for(q=0; q<max_qp+1; q++){
             int shift = div6[q];
@@ -900,7 +853,7 @@ static void init_dequant_tables(H264Context *h){
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][0][x] = 1<<6;
         if(h->pps.transform_8x8_mode)
-            for(i=0; i<6; i++)
+            for(i=0; i<2; i++)
                 for(x=0; x<64; x++)
                     h->dequant8_coeff[i][0][x] = 1<<6;
     }
@@ -915,7 +868,7 @@ int ff_h264_alloc_tables(H264Context *h){
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
 
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 32 * sizeof(uint8_t), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
 
@@ -977,8 +930,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
  * Allocate buffers which are not shared amongst multiple threads.
  */
 static int context_init(H264Context *h){
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
 
     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
@@ -1177,10 +1130,9 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
 
         // frame_start may not be called for the next thread (if it's decoding a bottom field)
         // so this has to be allocated here
-        h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
+        h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
 
         s->dsp.clear_blocks(h->mb);
-        s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
     }
 
     //extradata/NAL handling
@@ -1199,7 +1151,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
     for(i=0; i<6; i++)
         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
 
-    for(i=0; i<6; i++)
+    for(i=0; i<2; i++)
         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
 
     h->dequant_coeff_pps = h1->dequant_coeff_pps;
@@ -1254,20 +1206,20 @@ int ff_h264_frame_start(H264Context *h){
 
     for(i=0; i<16; i++){
         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
-    for(i=0; i<16; i++){
+    for(i=0; i<4; i++){
         h->block_offset[16+i]=
-        h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[48+16+i]=
-        h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+16+i]=
+        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
     for(i = 0; i < thread_count; i++)
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
-            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
+            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
 
     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
@@ -1452,7 +1404,7 @@ static void decode_postinit(H264Context *h, int setup_finished){
         ff_thread_finish_setup(s->avctx);
 }
 
-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
     int top_idx = 1;
@@ -1470,24 +1422,12 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
                 if (pixel_shift)
                     AV_COPY128(top_border+16, src_y+15*linesize+16);
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                    if(chroma444){
-                        if (pixel_shift){
-                            AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
-                            AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
-                            AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
-                            AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
-                        } else {
-                            AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
-                            AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
-                        }
+                    if (pixel_shift) {
+                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
+                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
                     } else {
-                        if (pixel_shift) {
-                            AV_COPY128(top_border+32, src_cb+7*uvlinesize);
-                            AV_COPY128(top_border+48, src_cr+7*uvlinesize);
-                        } else {
-                            AV_COPY64(top_border+16, src_cb+7*uvlinesize);
-                            AV_COPY64(top_border+24, src_cr+7*uvlinesize);
-                        }
+                    AV_COPY64(top_border+16, src_cb+7*uvlinesize);
+                    AV_COPY64(top_border+24, src_cr+7*uvlinesize);
                     }
                 }
             }
@@ -1505,24 +1445,12 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
         AV_COPY128(top_border+16, src_y+16*linesize+16);
 
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if(chroma444){
-            if (pixel_shift){
-                AV_COPY128(top_border+32, src_cb + 16*linesize);
-                AV_COPY128(top_border+48, src_cb + 16*linesize+16);
-                AV_COPY128(top_border+64, src_cr + 16*linesize);
-                AV_COPY128(top_border+80, src_cr + 16*linesize+16);
-            } else {
-                AV_COPY128(top_border+16, src_cb + 16*linesize);
-                AV_COPY128(top_border+32, src_cr + 16*linesize);
-            }
+        if (pixel_shift) {
+            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
+            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
         } else {
-            if (pixel_shift) {
-                AV_COPY128(top_border+32, src_cb+8*uvlinesize);
-                AV_COPY128(top_border+48, src_cr+8*uvlinesize);
-            } else {
-                AV_COPY64(top_border+16, src_cb+8*uvlinesize);
-                AV_COPY64(top_border+24, src_cr+8*uvlinesize);
-            }
+        AV_COPY64(top_border+16, src_cb+8*uvlinesize);
+        AV_COPY64(top_border+24, src_cr+8*uvlinesize);
         }
     }
 }
@@ -1530,8 +1458,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   uint8_t *src_cb, uint8_t *src_cr,
                                   int linesize, int uvlinesize,
-                                  int xchg, int chroma444,
-                                  int simple, int pixel_shift){
+                                  int xchg, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     int deblock_topleft;
     int deblock_top;
@@ -1586,28 +1513,13 @@ else      AV_COPY64(b,a);
         }
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if(chroma444){
+        if(deblock_top){
             if(deblock_topleft){
-                XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
-                XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
-            }
-            XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
-            XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
-            XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
-            XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
-            if(s->mb_x+1 < s->mb_width){
-                XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
-                XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
-            }
-        } else {
-            if(deblock_top){
-                if(deblock_topleft){
-                    XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
-                    XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
-                }
-                XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
-                XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
+                XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
             }
+            XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
+            XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
         }
     }
 }
@@ -1626,159 +1538,6 @@ static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int in
         AV_WN16A(mb + index, value);
 }
 
-static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
-                                                       int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
-{
-    MpegEncContext * const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
-    int i;
-    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
-    block_offset += 16*p;
-    if(IS_INTRA4x4(mb_type)){
-        if(simple || !s->encoding){
-            if(IS_8x8DCT(mb_type)){
-                if(transform_bypass){
-                    idct_dc_add =
-                    idct_add    = s->dsp.add_pixels8;
-                }else{
-                    idct_dc_add = h->h264dsp.h264_idct8_dc_add;
-                    idct_add    = h->h264dsp.h264_idct8_add;
-                }
-                for(i=0; i<16; i+=4){
-                    uint8_t * const ptr= dest_y + block_offset[i];
-                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                        h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                    }else{
-                        const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
-                        h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
-                                                    (h->topright_samples_available<<i)&0x4000, linesize);
-                        if(nnz){
-                            if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
-                                idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                            else
-                                idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                        }
-                    }
-                }
-            }else{
-                if(transform_bypass){
-                    idct_dc_add =
-                    idct_add    = s->dsp.add_pixels4;
-                }else{
-                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
-                    idct_add    = h->h264dsp.h264_idct_add;
-                }
-                for(i=0; i<16; i++){
-                    uint8_t * const ptr= dest_y + block_offset[i];
-                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-
-                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                        h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                    }else{
-                        uint8_t *topright;
-                        int nnz, tr;
-                        uint64_t tr_high;
-                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
-                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
-                            assert(mb_y || linesize <= block_offset[i]);
-                            if(!topright_avail){
-                                if (pixel_shift) {
-                                    tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
-                                    topright= (uint8_t*) &tr_high;
-                                } else {
-                                    tr= ptr[3 - linesize]*0x01010101;
-                                    topright= (uint8_t*) &tr;
-                                }
-                            }else
-                                topright= ptr + (4 << pixel_shift) - linesize;
-                        }else
-                            topright= NULL;
-
-                        h->hpc.pred4x4[ dir ](ptr, topright, linesize);
-                        nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
-                        if(nnz){
-                            if(is_h264){
-                                if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
-                                    idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                                else
-                                    idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
-                            }else
-                                ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
-                        }
-                    }
-                }
-            }
-        }
-    }else{
-        h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-        if(is_h264){
-            if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
-                if(!transform_bypass)
-                    h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
-                else{
-                    static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
-                                                            8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
-                    for(i = 0; i < 16; i++)
-                        dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
-                }
-            }
-        }else
-            ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
-    }
-}
-
-static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
-                                                    int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
-{
-    MpegEncContext * const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    int i;
-    block_offset += 16*p;
-    if(!IS_INTRA4x4(mb_type)){
-        if(is_h264){
-            if(IS_INTRA16x16(mb_type)){
-                if(transform_bypass){
-                    if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
-                        h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
-                    }else{
-                        for(i=0; i<16; i++){
-                            if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                                s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
-                        }
-                    }
-                }else{
-                    h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
-                }
-            }else if(h->cbp&15){
-                if(transform_bypass){
-                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
-                    idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
-                    for(i=0; i<16; i+=di){
-                        if(h->non_zero_count_cache[ scan8[i+p*16] ]){
-                            idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
-                        }
-                    }
-                }else{
-                    if(IS_8x8DCT(mb_type)){
-                        h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
-                    }else{
-                        h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
-                    }
-                }
-            }
-        }else{
-            for(i=0; i<16; i++){
-                if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
-                    uint8_t * const ptr= dest_y + block_offset[i];
-                    ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
-                }
-            }
-        }
-    }
-}
-
 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
@@ -1787,12 +1546,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize /*dct_offset*/;
-    int i, j;
+    int i;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
     /* is_h264 should always be true if SVQ3 is disabled. */
     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
@@ -1806,7 +1566,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     if (!simple && MB_FIELD) {
         linesize   = h->mb_linesize   = s->linesize * 2;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
-        block_offset = &h->block_offset[48];
+        block_offset = &h->block_offset[24];
         if(mb_y&1){ //FIXME move out of this function?
             dest_y -= s->linesize*15;
             dest_cb-= s->uvlinesize*7;
@@ -1847,93 +1607,216 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                 for (j = 0; j < 16; j++)
                     tmp_y[j] = get_bits(&gb, bit_depth);
             }
-            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                for (i = 0; i < 8; i++) {
-                    uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
-                    for (j = 0; j < 8; j++)
-                        tmp_cb[j] = get_bits(&gb, bit_depth);
-                }
-                for (i = 0; i < 8; i++) {
-                    uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
-                    for (j = 0; j < 8; j++)
-                        tmp_cr[j] = get_bits(&gb, bit_depth);
-                }
+            for (i = 0; i < 8; i++) {
+                uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
+                for (j = 0; j < 8; j++)
+                    tmp_cb[j] = get_bits(&gb, bit_depth);
+            }
+            for (i = 0; i < 8; i++) {
+                uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
+                for (j = 0; j < 8; j++)
+                    tmp_cr[j] = get_bits(&gb, bit_depth);
             }
         } else {
-            for (i=0; i<16; i++) {
-                memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
-            }
-            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                for (i=0; i<8; i++) {
-                    memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
-                    memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
-                }
-            }
+        for (i=0; i<16; i++) {
+            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
+        }
+        for (i=0; i<8; i++) {
+            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
+        }
         }
     } else {
         if(IS_INTRA(mb_type)){
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
 
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
             }
 
-            hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
+            if(IS_INTRA4x4(mb_type)){
+                if(simple || !s->encoding){
+                    if(IS_8x8DCT(mb_type)){
+                        if(transform_bypass){
+                            idct_dc_add =
+                            idct_add    = s->dsp.add_pixels8;
+                        }else{
+                            idct_dc_add = h->h264dsp.h264_idct8_dc_add;
+                            idct_add    = h->h264dsp.h264_idct8_add;
+                        }
+                        for(i=0; i<16; i+=4){
+                            uint8_t * const ptr= dest_y + block_offset[i];
+                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
+                            }else{
+                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
+                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+                                                            (h->topright_samples_available<<i)&0x4000, linesize);
+                                if(nnz){
+                                    if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+                                        idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
+                                    else
+                                        idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
+                                }
+                            }
+                        }
+                    }else{
+                        if(transform_bypass){
+                            idct_dc_add =
+                            idct_add    = s->dsp.add_pixels4;
+                        }else{
+                            idct_dc_add = h->h264dsp.h264_idct_dc_add;
+                            idct_add    = h->h264dsp.h264_idct_add;
+                        }
+                        for(i=0; i<16; i++){
+                            uint8_t * const ptr= dest_y + block_offset[i];
+                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
 
+                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
+                            }else{
+                                uint8_t *topright;
+                                int nnz, tr;
+                                uint64_t tr_high;
+                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                                    assert(mb_y || linesize <= block_offset[i]);
+                                    if(!topright_avail){
+                                        if (pixel_shift) {
+                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
+                                            topright= (uint8_t*) &tr_high;
+                                        } else {
+                                        tr= ptr[3 - linesize]*0x01010101;
+                                        topright= (uint8_t*) &tr;
+                                        }
+                                    }else
+                                        topright= ptr + (4 << pixel_shift) - linesize;
+                                }else
+                                    topright= NULL;
+
+                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
+                                nnz = h->non_zero_count_cache[ scan8[i] ];
+                                if(nnz){
+                                    if(is_h264){
+                                        if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+                                            idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
+                                        else
+                                            idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
+                                    }else
+                                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                                }
+                            }
+                        }
+                    }
+                }
+            }else{
+                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+                if(is_h264){
+                    if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
+                        if(!transform_bypass)
+                            h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
+                        else{
+                            static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
+                                                                    8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
+                            for(i = 0; i < 16; i++)
+                                dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
+                        }
+                    }
+                }else
+                    ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
+            }
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
         }else if(is_h264){
             if (pixel_shift) {
                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab, 0);
+                             h->h264dsp.biweight_h264_pixels_tab);
             } else
                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                             h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab, 0);
+                            h->h264dsp.biweight_h264_pixels_tab);
         }
 
-        hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
+
+        if(!IS_INTRA4x4(mb_type)){
+            if(is_h264){
+                if(IS_INTRA16x16(mb_type)){
+                    if(transform_bypass){
+                        if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
+                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
+                        }else{
+                            for(i=0; i<16; i++){
+                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
+                            }
+                        }
+                    }else{
+                         h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                    }
+                }else if(h->cbp&15){
+                    if(transform_bypass){
+                        const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+                        for(i=0; i<16; i+=di){
+                            if(h->non_zero_count_cache[ scan8[i] ]){
+                                idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
+                            }
+                        }
+                    }else{
+                        if(IS_8x8DCT(mb_type)){
+                            h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                        }else{
+                            h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                        }
+                    }
+                }
+            }else{
+                for(i=0; i<16; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                    }
+                }
+            }
+        }
 
         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
                 }else{
                     idct_add = s->dsp.add_pixels4;
-                    for(j=1; j<3; j++){
-                        for(i=j*16; i<j*16+4; i++){
-                            if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                                idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
-                        }
+                    for(i=16; i<16+8; i++){
+                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
                     }
                 }
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
                 }else{
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
-                    for(j=1; j<3; j++){
-                        for(i=j*16; i<j*16+4; i++){
-                            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                                uint8_t * const ptr= dest[j-1] + block_offset[i];
-                                ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
-                            }
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                    for(i=16; i<16+8; i++){
+                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                            uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
+                            ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
                         }
                     }
                 }
@@ -1941,113 +1824,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
         }
     }
     if(h->cbp || IS_INTRA(mb_type))
-    {
         s->dsp.clear_blocks(h->mb);
-        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
-    }
-}
-
-static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
-    MpegEncContext * const s = &h->s;
-    const int mb_x= s->mb_x;
-    const int mb_y= s->mb_y;
-    const int mb_xy= h->mb_xy;
-    const int mb_type= s->current_picture.mb_type[mb_xy];
-    uint8_t  *dest[3];
-    int linesize;
-    int i, j, p;
-    int *block_offset = &h->block_offset[0];
-    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
-    const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
-
-    for (p = 0; p < plane_count; p++)
-    {
-        dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
-        s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
-    }
-
-    h->list_counts[mb_xy]= h->list_count;
-
-    if (!simple && MB_FIELD) {
-        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
-        block_offset = &h->block_offset[48];
-        if(mb_y&1) //FIXME move out of this function?
-            for (p = 0; p < 3; p++)
-                dest[p] -= s->linesize*15;
-        if(FRAME_MBAFF) {
-            int list;
-            for(list=0; list<h->list_count; list++){
-                if(!USES_LIST(mb_type, list))
-                    continue;
-                if(IS_16X16(mb_type)){
-                    int8_t *ref = &h->ref_cache[list][scan8[0]];
-                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
-                }else{
-                    for(i=0; i<16; i+=4){
-                        int ref = h->ref_cache[list][scan8[i]];
-                        if(ref >= 0)
-                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
-                    }
-                }
-            }
-        }
-    } else {
-        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
-    }
-
-    if (!simple && IS_INTRA_PCM(mb_type)) {
-        if (pixel_shift) {
-            const int bit_depth = h->sps.bit_depth_luma;
-            GetBitContext gb;
-            init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
-
-            for (p = 0; p < plane_count; p++) {
-                for (i = 0; i < 16; i++) {
-                    uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
-                    for (j = 0; j < 16; j++)
-                        tmp[j] = get_bits(&gb, bit_depth);
-                }
-            }
-        } else {
-            for (p = 0; p < plane_count; p++) {
-                for (i = 0; i < 16; i++) {
-                    memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
-                }
-            }
-        }
-    } else {
-        if(IS_INTRA(mb_type)){
-            if(h->deblocking_filter)
-                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
-
-            for (p = 0; p < plane_count; p++)
-                hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
-
-            if(h->deblocking_filter)
-                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
-        }else{
-            if (pixel_shift) {
-                hl_motion_16(h, dest[0], dest[1], dest[2],
-                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                             h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab, 1);
-            } else
-                hl_motion_8(h, dest[0], dest[1], dest[2],
-                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                            h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab, 1);
-        }
-
-        for (p = 0; p < plane_count; p++)
-            hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
-    }
-    if(h->cbp || IS_INTRA(mb_type))
-    {
-        s->dsp.clear_blocks(h->mb);
-        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
-    }
 }
 
 /**
@@ -2067,26 +1844,13 @@ static void av_noinline hl_decode_mb_complex(H264Context *h){
     hl_decode_mb_internal(h, 0, h->pixel_shift);
 }
 
-static void av_noinline hl_decode_mb_444_complex(H264Context *h){
-    hl_decode_mb_444_internal(h, 0, h->pixel_shift);
-}
-
-static void av_noinline hl_decode_mb_444_simple(H264Context *h){
-    hl_decode_mb_444_internal(h, 1, 0);
-}
-
 void ff_h264_hl_decode_mb(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
 
-    if (CHROMA444) {
-        if(is_complex || h->pixel_shift)
-            hl_decode_mb_444_complex(h);
-        else
-            hl_decode_mb_444_simple(h);
-    } else if (is_complex) {
+    if (is_complex) {
         hl_decode_mb_complex(h);
     } else if (h->pixel_shift) {
         hl_decode_mb_simple_16(h);
@@ -2102,7 +1866,7 @@ static int pred_weight_table(H264Context *h){
     h->use_weight= 0;
     h->use_weight_chroma= 0;
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
-    if(h->sps.chroma_format_idc)
+    if(CHROMA)
         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
     luma_def = 1<<h->luma_log2_weight_denom;
     chroma_def = 1<<h->chroma_log2_weight_denom;
@@ -2127,7 +1891,7 @@ static int pred_weight_table(H264Context *h){
                 h->luma_weight[i][list][1]= 0;
             }
 
-            if(h->sps.chroma_format_idc){
+            if(CHROMA){
                 chroma_weight_flag= get_bits1(&s->gb);
                 if(chroma_weight_flag){
                     int j;
@@ -2557,11 +2321,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     h->b_stride=  s->mb_width*4;
 
-    s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
+    s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
     if(h->sps.frame_mbs_only_flag)
-        s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
+        s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
     else
-        s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
+        s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
 
     if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height
@@ -2606,22 +2370,18 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
         switch (h->sps.bit_depth_luma) {
             case 9 :
-                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
+                s->avctx->pix_fmt = PIX_FMT_YUV420P9;
                 break;
             case 10 :
-                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
+                s->avctx->pix_fmt = PIX_FMT_YUV420P10;
                 break;
             default:
-                if (CHROMA444){
-                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
-                }else{
-                    s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
-                                                             s->avctx->codec->pix_fmts ?
-                                                             s->avctx->codec->pix_fmts :
-                                                             s->avctx->color_range == AVCOL_RANGE_JPEG ?
-                                                             hwaccel_pixfmt_list_h264_jpeg_420 :
-                                                             ff_hwaccel_pixfmt_list_420);
-                }
+        s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
+                                                 s->avctx->codec->pix_fmts ?
+                                                 s->avctx->codec->pix_fmts :
+                                                 s->avctx->color_range == AVCOL_RANGE_JPEG ?
+                                                 hwaccel_pixfmt_list_h264_jpeg_420 :
+                                                 ff_hwaccel_pixfmt_list_420);
         }
 
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
@@ -3113,10 +2873,11 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     if(IS_INTRA(mb_type))
         return 0;
 
-    AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
+    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
+    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
+    AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
+    AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
+    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
 
     h->cbp= h->cbp_table[mb_xy];
 
@@ -3168,45 +2929,45 @@ static int fill_filter_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
     }
 
     if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
+        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
+        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
+        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
+        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
     }
 
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
     if(!CABAC && h->pps.transform_8x8_mode){
         if(IS_8x8DCT(top_type)){
             h->non_zero_count_cache[4+8*0]=
-            h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
+            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
             h->non_zero_count_cache[6+8*0]=
-            h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
+            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
         }
         if(IS_8x8DCT(left_type[0])){
             h->non_zero_count_cache[3+8*1]=
-            h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
         }
         if(IS_8x8DCT(left_type[1])){
             h->non_zero_count_cache[3+8*3]=
-            h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
         }
 
         if(IS_8x8DCT(mb_type)){
             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
 
             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
 
             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
 
             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
         }
     }
 
@@ -3280,8 +3041,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
-                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
-                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
+                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
                     //FIXME simplify above
 
                 if (MB_FIELD) {
@@ -3296,7 +3057,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                     linesize   = h->mb_linesize   = s->linesize;
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
                 }
-                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
+                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
                 if(fill_filter_caches(h, mb_type))
                     continue;
                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 3abf895010..8c4f1ab21a 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -39,6 +39,9 @@
 #define interlaced_dct interlaced_dct_is_a_bad_name
 #define mb_intra mb_intra_is_not_initialized_see_mb_type
 
+#define LUMA_DC_BLOCK_INDEX   24
+#define CHROMA_DC_BLOCK_INDEX 25
+
 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
 #define COEFF_TOKEN_VLC_BITS           8
 #define TOTAL_ZEROS_VLC_BITS           9
@@ -57,6 +60,8 @@
  * of progressive decoding by about 2%. */
 #define ALLOW_INTERLACE
 
+#define ALLOW_NOCHROMA
+
 #define FMO 0
 
 /**
@@ -80,12 +85,16 @@
 #endif
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
 
+#ifdef ALLOW_NOCHROMA
+#define CHROMA h->sps.chroma_format_idc
+#else
+#define CHROMA 1
+#endif
+
 #ifndef CABAC
 #define CABAC h->pps.cabac
 #endif
 
-#define CHROMA444 (h->sps.chroma_format_idc == 3)
-
 #define EXTENDED_SAR          255
 
 #define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
@@ -189,7 +198,7 @@ typedef struct SPS{
     int num_reorder_frames;
     int scaling_matrix_present;
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[6][64];
+    uint8_t scaling_matrix8[2][64];
     int nal_hrd_parameters_present_flag;
     int vcl_hrd_parameters_present_flag;
     int pic_struct_present_flag;
@@ -224,7 +233,7 @@ typedef struct PPS{
     int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;     ///< transform_8x8_mode_flag
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[6][64];
+    uint8_t scaling_matrix8[2][64];
     uint8_t chroma_qp_table[2][64];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
 }PPS;
@@ -289,15 +298,21 @@ typedef struct H264Context{
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
-    uint8_t (*top_borders[2])[(16*3)*2];
+    uint8_t (*top_borders[2])[(16+2*8)*2];
 
     /**
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
+    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
 
-    uint8_t (*non_zero_count)[48];
+    /*
+    .UU.YYYY
+    .UU.YYYY
+    .vv.YYYY
+    .VV.YYYY
+    */
+    uint8_t (*non_zero_count)[32];
 
     /**
      * Motion vector cache.
@@ -321,7 +336,7 @@ typedef struct H264Context{
      * block_offset[ 0..23] for frame macroblocks
      * block_offset[24..47] for field macroblocks
      */
-    int block_offset[2*(16*3)];
+    int block_offset[2*(16+8)];
 
     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
     uint32_t *mb2br_xy;
@@ -341,9 +356,9 @@ typedef struct H264Context{
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
     uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
-    uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
+    uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64];
     uint32_t (*dequant4_coeff[6])[16];
-    uint32_t (*dequant8_coeff[6])[64];
+    uint32_t (*dequant8_coeff[2])[64];
 
     int slice_num;
     uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
@@ -393,15 +408,15 @@ typedef struct H264Context{
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
     DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
      * Cabac
      */
     CABACContext cabac;
-    uint8_t      cabac_state[1024];
+    uint8_t      cabac_state[460];
 
     /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
     uint16_t     *cbp_table;
@@ -706,43 +721,27 @@ o-o o-o
 */
 
 /* Scan8 organization:
- *    0 1 2 3 4 5 6 7
- * 0  DY    y y y y y
- * 1        y Y Y Y Y
- * 2        y Y Y Y Y
- * 3        y Y Y Y Y
- * 4        y Y Y Y Y
- * 5  DU    u u u u u
- * 6        u U U U U
- * 7        u U U U U
- * 8        u U U U U
- * 9        u U U U U
- * 10 DV    v v v v v
- * 11       v V V V V
- * 12       v V V V V
- * 13       v V V V V
- * 14       v V V V V
+ *   0 1 2 3 4 5 6 7
+ * 0   u u y y y y y
+ * 1 u U U y Y Y Y Y
+ * 2 u U U y Y Y Y Y
+ * 3   v v y Y Y Y Y
+ * 4 v V V y Y Y Y Y
+ * 5 v V V   DYDUDV
  * DY/DU/DV are for luma/chroma DC.
  */
 
-#define LUMA_DC_BLOCK_INDEX   48
-#define CHROMA_DC_BLOCK_INDEX 49
-
 //This table must be here because scan8[constant] must be known at compiletime
-static const uint8_t scan8[16*3 + 3]={
- 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
- 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
- 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
- 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
- 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
- 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
- 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
- 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
- 4+11*8, 5+11*8, 4+12*8, 5+12*8,
- 6+11*8, 7+11*8, 6+12*8, 7+12*8,
- 4+13*8, 5+13*8, 4+14*8, 5+14*8,
- 6+13*8, 7+13*8, 6+14*8, 7+14*8,
- 0+ 0*8, 0+ 5*8, 0+10*8
+static const uint8_t scan8[16 + 2*4 + 3]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+ 4+5*8, 5+5*8, 6+5*8
 };
 
 static av_always_inline uint32_t pack16to32(int a, int b){
@@ -774,11 +773,11 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
-    static const uint8_t left_block_options[4][32]={
-        {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
-        {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
-        {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4},
-        {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}
+    static const uint8_t left_block_options[4][16]={
+        {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
+        {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
+        {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
+        {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
     };
 
     h->topleft_partition= -1;
@@ -948,41 +947,32 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
-        if(CHROMA444){
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
-        }else{
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
-        }
-    }else{
-        uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
-        AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
-        AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
-        AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
+            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
+            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
+
+            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
+            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
+    }else {
+            h->non_zero_count_cache[1+8*0]=
+            h->non_zero_count_cache[2+8*0]=
+
+            h->non_zero_count_cache[1+8*3]=
+            h->non_zero_count_cache[2+8*3]=
+            AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
     }
 
     for (i=0; i<2; i++) {
         if(left_type[i]){
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
-            if(CHROMA444){
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
-                h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
-            }else{
-                h->non_zero_count_cache[3+8* 6 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
-                h->non_zero_count_cache[3+8*11 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
-            }
+            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
+            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
+                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
+                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
         }else{
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 6 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 7 + 2*8*i]=
-            h->non_zero_count_cache[3+8*11 + 2*8*i]=
-            h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+                h->non_zero_count_cache[3+8*1 + 2*8*i]=
+                h->non_zero_count_cache[3+8*2 + 2*8*i]=
+                h->non_zero_count_cache[0+8*1 +   8*i]=
+                h->non_zero_count_cache[0+8*4 +   8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
 
@@ -991,15 +981,15 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         if(top_type) {
             h->top_cbp = h->cbp_table[top_xy];
         } else {
-            h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
+            h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
         }
         // left_cbp
         if (left_type[0]) {
-            h->left_cbp =   (h->cbp_table[left_xy[0]] & 0x7F0)
+            h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
                         |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
                         | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
         } else {
-            h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
+            h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
         }
     }
     }
@@ -1200,21 +1190,11 @@ static inline int pred_intra_mode(H264Context *h, int n){
 static inline void write_back_non_zero_count(H264Context *h){
     const int mb_xy= h->mb_xy;
 
-    AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
-    AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
-    AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
-    AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
-    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
-    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
-    AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
-    AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
-
-    if(CHROMA444){
-        AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
-        AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
-        AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
-        AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
-    }
+    AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
+    AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
+    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
+    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
+    AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
 }
 
 static inline void write_back_motion(H264Context *h, int mb_type){
@@ -1287,7 +1267,8 @@ static void av_unused decode_mb_skip(H264Context *h){
     const int mb_xy= h->mb_xy;
     int mb_type=0;
 
-    memset(h->non_zero_count[mb_xy], 0, 48);
+    memset(h->non_zero_count[mb_xy], 0, 32);
+    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
 
     if(MB_FIELD)
         mb_type|= MB_TYPE_INTERLACED;
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index f30f4e1c9c..69af1e2ded 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -45,7 +45,7 @@
 
 /* Cabac pre state table */
 
-static const int8_t cabac_context_init_I[1024][2] =
+static const int8_t cabac_context_init_I[460][2] =
 {
     /* 0 - 10 */
     { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
@@ -211,153 +211,10 @@ static const int8_t cabac_context_init_I[1024][2] =
     { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
     {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
     {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
-
-    /* 460 -> 1024 */
-    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
-    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
-    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
-    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
-    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
-    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
-    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
-    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
-    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
-    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
-    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
-    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
-    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
-    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
-    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
-    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
-    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
-    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
-    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
-    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
-    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
-    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
-    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
-    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
-    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
-    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
-    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
-    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
-    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
-    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
-    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
-    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
-    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
-    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
-    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
-    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
-    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
-    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
-    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
-    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
-    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
-    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
-    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
-    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
-    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
-    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
-    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
-    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
-    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
-    { -23,  68 }, { -24,  50 }, { -11,  74 }, { -14, 106 },
-    { -13,  97 }, { -15,  90 }, { -12,  90 }, { -18,  88 },
-    { -10,  73 }, {  -9,  79 }, { -14,  86 }, { -10,  73 },
-    { -10,  70 }, { -10,  69 }, {  -5,  66 }, {  -9,  64 },
-    {  -5,  58 }, {   2,  59 }, {  23, -13 }, {  26, -13 },
-    {  40, -15 }, {  49, -14 }, {  44,   3 }, {  45,   6 },
-    {  44,  34 }, {  33,  54 }, {  19,  82 }, {  21, -10 },
-    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
-    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
-    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
-    {   0,  68 }, {  -9,  92 }, { -17, 120 }, { -20, 112 },
-    { -18, 114 }, { -11,  85 }, { -15,  92 }, { -14,  89 },
-    { -26,  71 }, { -15,  81 }, { -14,  80 }, {   0,  68 },
-    { -14,  70 }, { -24,  56 }, { -23,  68 }, { -24,  50 },
-    { -11,  74 }, { -14, 106 }, { -13,  97 }, { -15,  90 },
-    { -12,  90 }, { -18,  88 }, { -10,  73 }, {  -9,  79 },
-    { -14,  86 }, { -10,  73 }, { -10,  70 }, { -10,  69 },
-    {  -5,  66 }, {  -9,  64 }, {  -5,  58 }, {   2,  59 },
-    {  23, -13 }, {  26, -13 }, {  40, -15 }, {  49, -14 },
-    {  44,   3 }, {  45,   6 }, {  44,  34 }, {  33,  54 },
-    {  19,  82 }, {  21, -10 }, {  24, -11 }, {  28,  -8 },
-    {  28,  -1 }, {  29,   3 }, {  29,   9 }, {  35,  20 },
-    {  29,  36 }, {  14,  67 }, {  -3,  75 }, {  -1,  23 },
-    {   1,  34 }, {   1,  43 }, {   0,  54 }, {  -2,  55 },
-    {   0,  61 }, {   1,  64 }, {   0,  68 }, {  -9,  92 },
-    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
-    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
-    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
-    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
-    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
-    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
-    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
-    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
-    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
-    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
-    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
-    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
-    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
-    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
-    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
-    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
-    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
-    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
-    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
-    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
-    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
-    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
-    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
-    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
-    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
-    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
-    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
-    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
-    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
-    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
-    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
-    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
-    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
-    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
-    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
-    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
-    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
-    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
-    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
-    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
-    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
-    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
-    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
-    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
-    {  -3,  71 }, {  -6,  42 }, {  -5,  50 }, {  -3,  54 },
-    {  -2,  62 }, {   0,  58 }, {   1,  63 }, {  -2,  72 },
-    {  -1,  74 }, {  -9,  91 }, {  -5,  67 }, {  -5,  27 },
-    {  -3,  39 }, {  -2,  44 }, {   0,  46 }, { -16,  64 },
-    {  -8,  68 }, { -10,  78 }, {  -6,  77 }, { -10,  86 },
-    { -12,  92 }, { -15,  55 }, { -10,  60 }, {  -6,  62 },
-    {  -4,  65 }, { -12,  73 }, {  -8,  76 }, {  -7,  80 },
-    {  -9,  88 }, { -17, 110 }, {  -3,  71 }, {  -6,  42 },
-    {  -5,  50 }, {  -3,  54 }, {  -2,  62 }, {   0,  58 },
-    {   1,  63 }, {  -2,  72 }, {  -1,  74 }, {  -9,  91 },
-    {  -5,  67 }, {  -5,  27 }, {  -3,  39 }, {  -2,  44 },
-    {   0,  46 }, { -16,  64 }, {  -8,  68 }, { -10,  78 },
-    {  -6,  77 }, { -10,  86 }, { -12,  92 }, { -15,  55 },
-    { -10,  60 }, {  -6,  62 }, {  -4,  65 }, { -12,  73 },
-    {  -8,  76 }, {  -7,  80 }, {  -9,  88 }, { -17, 110 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 }
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
 };
 
-static const int8_t cabac_context_init_PB[3][1024][2] =
+static const int8_t cabac_context_init_PB[3][460][2] =
 {
     /* i_cabac_init_idc == 0 */
     {
@@ -513,149 +370,6 @@ static const int8_t cabac_context_init_PB[3][1024][2] =
         { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
         {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
         {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
-
-        /* 460 - 1024 */
-        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
-        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
-        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
-        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
-        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
-        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
-        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
-        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
-        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
-        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
-        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
-        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
-        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
-        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
-        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
-        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
-        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
-        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
-        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
-        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
-        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
-        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
-        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
-        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
-        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
-        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
-        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
-        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
-        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
-        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
-        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
-        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
-        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
-        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
-        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
-        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
-        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
-        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
-        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
-        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
-        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
-        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
-        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
-        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
-        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
-        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
-        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
-        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
-        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
-        { -16,  66 }, { -22,  65 }, { -20,  63 }, {  -5,  85 },
-        {  -6,  81 }, { -10,  77 }, {  -7,  81 }, { -17,  80 },
-        { -18,  73 }, {  -4,  74 }, { -10,  83 }, {  -9,  71 },
-        {  -9,  67 }, {  -1,  61 }, {  -8,  66 }, { -14,  66 },
-        {   0,  59 }, {   2,  59 }, {   9,  -2 }, {  26,  -9 },
-        {  33,  -9 }, {  39,  -7 }, {  41,  -2 }, {  45,   3 },
-        {  49,   9 }, {  45,  27 }, {  36,  59 }, {  21, -13 },
-        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
-        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
-        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
-        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
-        {  -8,  66 }, {  -8,  76 }, {  -4,  79 }, {  -7,  71 },
-        {  -5,  69 }, {  -9,  70 }, {  -8,  66 }, { -10,  68 },
-        { -19,  73 }, { -12,  69 }, { -16,  70 }, { -15,  67 },
-        { -20,  62 }, { -19,  70 }, { -16,  66 }, { -22,  65 },
-        { -20,  63 }, {  -5,  85 }, {  -6,  81 }, { -10,  77 },
-        {  -7,  81 }, { -17,  80 }, { -18,  73 }, {  -4,  74 },
-        { -10,  83 }, {  -9,  71 }, {  -9,  67 }, {  -1,  61 },
-        {  -8,  66 }, { -14,  66 }, {   0,  59 }, {   2,  59 },
-        {   9,  -2 }, {  26,  -9 }, {  33,  -9 }, {  39,  -7 },
-        {  41,  -2 }, {  45,   3 }, {  49,   9 }, {  45,  27 },
-        {  36,  59 }, {  21, -13 }, {  33, -14 }, {  39,  -7 },
-        {  46,  -2 }, {  51,   2 }, {  60,   6 }, {  61,  17 },
-        {  55,  34 }, {  42,  62 }, {  -6,  66 }, {  -7,  35 },
-        {  -7,  42 }, {  -8,  45 }, {  -5,  48 }, { -12,  56 },
-        {  -6,  60 }, {  -5,  62 }, {  -8,  66 }, {  -8,  76 },
-        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
-        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
-        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
-        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
-        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
-        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
-        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
-        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
-        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
-        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
-        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
-        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
-        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
-        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
-        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
-        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
-        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
-        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
-        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
-        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
-        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
-        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
-        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
-        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
-        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
-        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
-        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
-        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
-        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
-        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
-        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
-        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
-        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
-        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
-        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
-        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
-        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
-        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
-        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
-        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
-        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
-        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
-        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
-        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
-        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
-        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
-        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
-        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
-        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
-        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
-        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
-        {  -3,  74 }, { -10,  90 }, {  -6,  76 }, {  -2,  44 },
-        {   0,  45 }, {   0,  52 }, {  -3,  64 }, {  -2,  59 },
-        {  -4,  70 }, {  -4,  75 }, {  -8,  82 }, { -17, 102 },
-        {  -9,  77 }, {   3,  24 }, {   0,  42 }, {   0,  48 },
-        {   0,  55 }, {  -6,  59 }, {  -7,  71 }, { -12,  83 },
-        { -11,  87 }, { -30, 119 }, {   1,  58 }, {  -3,  29 },
-        {  -1,  36 }, {   1,  38 }, {   2,  43 }, {  -6,  55 },
-        {   0,  58 }, {   0,  64 }, {  -3,  74 }, { -10,  90 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 }
     },
 
     /* i_cabac_init_idc == 1 */
@@ -812,149 +526,6 @@ static const int8_t cabac_context_init_PB[3][1024][2] =
         {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
         {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
         {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
-
-        /* 460 - 1024 */
-        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
-        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
-        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
-        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
-        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
-        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
-        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
-        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
-        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
-        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
-        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
-        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
-        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
-        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
-        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
-        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
-        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
-        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
-        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
-        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
-        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
-        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
-        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
-        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
-        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
-        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
-        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
-        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
-        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
-        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
-        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
-        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
-        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
-        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
-        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
-        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
-        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
-        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
-        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
-        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
-        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
-        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
-        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
-        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
-        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
-        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
-        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
-        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
-        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
-        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  -3,  81 },
-        {  -3,  76 }, {  -7,  72 }, {  -6,  78 }, { -12,  72 },
-        { -14,  68 }, {  -3,  70 }, {  -6,  76 }, {  -5,  66 },
-        {  -5,  62 }, {   0,  57 }, {  -4,  61 }, {  -9,  60 },
-        {   1,  54 }, {   2,  58 }, {  17, -10 }, {  32, -13 },
-        {  42,  -9 }, {  49,  -5 }, {  53,   0 }, {  64,   3 },
-        {  68,  10 }, {  66,  27 }, {  47,  57 }, {  17, -10 },
-        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
-        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
-        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
-        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
-        {  -4,  67 }, {  -7,  82 }, {  -5,  85 }, {  -6,  81 },
-        { -10,  77 }, {  -7,  81 }, { -17,  80 }, { -18,  73 },
-        {  -4,  74 }, { -10,  83 }, {  -9,  71 }, {  -9,  67 },
-        {  -1,  61 }, {  -8,  66 }, { -14,  66 }, {   0,  59 },
-        {   2,  59 }, {  -3,  81 }, {  -3,  76 }, {  -7,  72 },
-        {  -6,  78 }, { -12,  72 }, { -14,  68 }, {  -3,  70 },
-        {  -6,  76 }, {  -5,  66 }, {  -5,  62 }, {   0,  57 },
-        {  -4,  61 }, {  -9,  60 }, {   1,  54 }, {   2,  58 },
-        {  17, -10 }, {  32, -13 }, {  42,  -9 }, {  49,  -5 },
-        {  53,   0 }, {  64,   3 }, {  68,  10 }, {  66,  27 },
-        {  47,  57 }, {  17, -10 }, {  32, -13 }, {  42,  -9 },
-        {  49,  -5 }, {  53,   0 }, {  64,   3 }, {  68,  10 },
-        {  66,  27 }, {  47,  57 }, {  -5,  71 }, {   0,  24 },
-        {  -1,  36 }, {  -2,  42 }, {  -2,  52 }, {  -9,  57 },
-        {  -6,  63 }, {  -4,  65 }, {  -4,  67 }, {  -7,  82 },
-        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
-        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
-        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
-        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
-        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
-        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
-        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
-        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
-        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
-        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
-        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
-        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
-        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
-        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
-        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
-        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
-        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
-        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
-        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
-        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
-        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
-        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
-        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
-        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
-        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
-        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
-        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
-        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
-        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
-        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
-        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
-        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
-        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
-        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
-        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
-        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
-        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
-        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
-        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
-        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
-        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
-        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
-        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
-        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
-        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
-        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
-        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
-        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
-        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
-        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
-        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
-        {  -5,  74 }, {  -9,  86 }, { -23, 112 }, { -15,  71 },
-        {  -7,  61 }, {   0,  53 }, {  -5,  66 }, { -11,  77 },
-        {  -9,  80 }, {  -9,  84 }, { -10,  87 }, { -34, 127 },
-        { -21, 101 }, {  -3,  39 }, {  -5,  53 }, {  -7,  61 },
-        { -11,  75 }, { -15,  77 }, { -17,  91 }, { -25, 107 },
-        { -25, 111 }, { -28, 122 }, { -11,  76 }, { -10,  44 },
-        { -10,  52 }, { -10,  57 }, {  -9,  58 }, { -16,  72 },
-        {  -7,  69 }, {  -4,  69 }, {  -5,  74 }, {  -9,  86 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 }
     },
 
     /* i_cabac_init_idc == 2 */
@@ -1111,149 +682,6 @@ static const int8_t cabac_context_init_PB[3][1024][2] =
         { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
         {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
         {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
-
-        /* 460 - 1024 */
-        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
-        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
-        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
-        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
-        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
-        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
-        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
-        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
-        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
-        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
-        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
-        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
-        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
-        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
-        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
-        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
-        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
-        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
-        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
-        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
-        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
-        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
-        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
-        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
-        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
-        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
-        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
-        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
-        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
-        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
-        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
-        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
-        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
-        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
-        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
-        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
-        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
-        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
-        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
-        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
-        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
-        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
-        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
-        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
-        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
-        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
-        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
-        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
-        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
-        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {  -3,  78 },
-        {  -8,  74 }, {  -9,  72 }, { -10,  72 }, { -18,  75 },
-        { -12,  71 }, { -11,  63 }, {  -5,  70 }, { -17,  75 },
-        { -14,  72 }, { -16,  67 }, {  -8,  53 }, { -14,  59 },
-        {  -9,  52 }, { -11,  68 }, {   9,  -2 }, {  30, -10 },
-        {  31,  -4 }, {  33,  -1 }, {  33,   7 }, {  31,  12 },
-        {  37,  23 }, {  31,  38 }, {  20,  64 }, {   9,  -2 },
-        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
-        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
-        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
-        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
-        {  -6,  68 }, { -10,  79 }, {  -3,  78 }, {  -8,  74 },
-        {  -9,  72 }, { -10,  72 }, { -18,  75 }, { -12,  71 },
-        { -11,  63 }, {  -5,  70 }, { -17,  75 }, { -14,  72 },
-        { -16,  67 }, {  -8,  53 }, { -14,  59 }, {  -9,  52 },
-        { -11,  68 }, {  -3,  78 }, {  -8,  74 }, {  -9,  72 },
-        { -10,  72 }, { -18,  75 }, { -12,  71 }, { -11,  63 },
-        {  -5,  70 }, { -17,  75 }, { -14,  72 }, { -16,  67 },
-        {  -8,  53 }, { -14,  59 }, {  -9,  52 }, { -11,  68 },
-        {   9,  -2 }, {  30, -10 }, {  31,  -4 }, {  33,  -1 },
-        {  33,   7 }, {  31,  12 }, {  37,  23 }, {  31,  38 },
-        {  20,  64 }, {   9,  -2 }, {  30, -10 }, {  31,  -4 },
-        {  33,  -1 }, {  33,   7 }, {  31,  12 }, {  37,  23 },
-        {  31,  38 }, {  20,  64 }, {  -9,  71 }, {  -7,  37 },
-        {  -8,  44 }, { -11,  49 }, { -10,  56 }, { -12,  59 },
-        {  -8,  63 }, {  -9,  67 }, {  -6,  68 }, { -10,  79 },
-        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
-        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
-        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
-        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
-        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
-        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
-        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
-        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
-        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
-        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
-        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
-        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
-        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
-        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
-        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
-        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
-        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
-        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
-        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
-        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
-        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
-        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
-        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
-        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
-        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
-        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
-        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
-        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
-        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
-        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
-        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
-        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
-        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
-        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
-        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
-        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
-        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
-        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
-        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
-        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
-        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
-        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
-        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
-        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
-        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
-        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
-        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
-        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
-        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
-        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
-        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
-        { -12,  92 }, { -18, 108 }, { -24, 115 }, { -22,  82 },
-        {  -9,  62 }, {   0,  53 }, {   0,  59 }, { -14,  85 },
-        { -13,  89 }, { -13,  94 }, { -11,  92 }, { -29, 127 },
-        { -21, 100 }, { -14,  57 }, { -12,  67 }, { -11,  71 },
-        { -10,  77 }, { -21,  85 }, { -16,  88 }, { -23, 104 },
-        { -15,  98 }, { -37, 127 }, { -10,  82 }, {  -8,  48 },
-        {  -8,  61 }, {  -8,  66 }, {  -7,  70 }, { -14,  75 },
-        { -10,  79 }, {  -9,  83 }, { -12,  92 }, { -18, 108 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 }
     }
 };
 
@@ -1267,7 +695,7 @@ void ff_h264_init_cabac_states(H264Context *h) {
     else                                 tab = cabac_context_init_PB[h->cabac_init_idc];
 
     /* calculate pre-state */
-    for( i= 0; i < 1024; i++ ) {
+    for( i= 0; i < 460; i++ ) {
         int pre = 2*(((tab[i][0] * slice_qp) >>4 ) + tab[i][1]) - 127;
 
         pre^= pre>>31;
@@ -1529,22 +957,21 @@ static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda
     my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
 }
 
-static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int max_coeff, int is_dc ) {
+static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
     int nza, nzb;
     int ctx = 0;
-    static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
 
     if( is_dc ) {
-        if( cat == 3 ) {
+        if( cat == 0 ) {
+            nza = h->left_cbp&0x100;
+            nzb = h-> top_cbp&0x100;
+        } else {
             idx -= CHROMA_DC_BLOCK_INDEX;
             nza = (h->left_cbp>>(6+idx))&0x01;
             nzb = (h-> top_cbp>>(6+idx))&0x01;
-        } else {
-            idx -= LUMA_DC_BLOCK_INDEX;
-            nza = h->left_cbp&(0x100<<idx);
-            nzb = h-> top_cbp&(0x100<<idx);
         }
     } else {
+        assert(cat == 1 || cat == 2 || cat == 4);
         nza = h->non_zero_count_cache[scan8[idx] - 1];
         nzb = h->non_zero_count_cache[scan8[idx] - 8];
     }
@@ -1555,7 +982,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
     if( nzb > 0 )
         ctx += 2;
 
-    return base_ctx[cat] + ctx;
+    return ctx + 4 * cat;
 }
 
 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
@@ -1566,16 +993,16 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
 };
 
 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
-    static const int significant_coeff_flag_offset[2][14] = {
-      { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
-      { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
+    static const int significant_coeff_flag_offset[2][6] = {
+      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
+      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
     };
-    static const int last_coeff_flag_offset[2][14] = {
-      { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
-      { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
+    static const int last_coeff_flag_offset[2][6] = {
+      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
+      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
     };
-    static const int coeff_abs_level_m1_offset[14] = {
-        227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
+    static const int coeff_abs_level_m1_offset[6] = {
+        227+0, 227+10, 227+20, 227+30, 227+39, 426
     };
     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
@@ -1630,7 +1057,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     abs_level_m1_ctx_base = h->cabac_state
         + coeff_abs_level_m1_offset[cat];
 
-    if( !is_dc && max_coeff == 64 ) {
+    if( !is_dc && cat == 5 ) {
 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
         for(last= 0; last < coefs; last++) { \
             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
@@ -1648,11 +1075,9 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         }
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
-        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
-                                                 last_coeff_ctx_base-significant_coeff_ctx_base, sig_off);
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
     } else {
-        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
-                                             last_coeff_ctx_base-significant_coeff_ctx_base);
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
 #else
         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
     } else {
@@ -1662,16 +1087,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     assert(coeff_count > 0);
 
     if( is_dc ) {
-        if( cat == 3 )
-            h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
+        if( cat == 0 )
+            h->cbp_table[h->mb_xy] |= 0x100;
         else
-            h->cbp_table[h->mb_xy] |= 0x100 << (n - LUMA_DC_BLOCK_INDEX);
+            h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
         h->non_zero_count_cache[scan8[n]] = coeff_count;
     } else {
-        if( max_coeff == 64 )
+        if( cat == 5 )
             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
         else {
-            assert( cat == 1 || cat ==  2 || cat ==  4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 );
+            assert( cat == 1 || cat == 2 || cat == 4 );
             h->non_zero_count_cache[scan8[n]] = coeff_count;
         }
     }
@@ -1754,7 +1179,7 @@ static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block
 
 static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
     /* read coded block flag */
-    if( get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 1 ) ] ) == 0 ) {
+    if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) {
         h->non_zero_count_cache[scan8[n]] = 0;
         return;
     }
@@ -1763,68 +1188,13 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
 
 static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
     /* read coded block flag */
-    if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
-        if( max_coeff == 64 ) {
-            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 0, 1);
-        } else {
-            h->non_zero_count_cache[scan8[n]] = 0;
-        }
+    if( cat != 5 && get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) {
+        h->non_zero_count_cache[scan8[n]] = 0;
         return;
     }
     decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff );
 }
 
-static av_always_inline void decode_cabac_luma_residual( H264Context *h, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p )
-{
-    static const uint8_t ctx_cat[4][3] = {{0,6,10},{1,7,11},{2,8,12},{5,9,13}};
-    const uint32_t *qmul;
-    int i8x8, i4x4;
-    MpegEncContext * const s = &h->s;
-    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
-    if( IS_INTRA16x16( mb_type ) ) {
-        //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
-        AV_ZERO128(h->mb_luma_dc[p]+0);
-        AV_ZERO128(h->mb_luma_dc[p]+8);
-        AV_ZERO128(h->mb_luma_dc[p]+16);
-        AV_ZERO128(h->mb_luma_dc[p]+24);
-        decode_cabac_residual_dc(h, h->mb_luma_dc[p], ctx_cat[0][p], LUMA_DC_BLOCK_INDEX+p, scan, 16);
-
-        if( cbp&15 ) {
-            qmul = h->dequant4_coeff[p][qscale];
-            for( i4x4 = 0; i4x4 < 16; i4x4++ ) {
-                const int index = 16*p + i4x4;
-                //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", index );
-                decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[1][p], index, scan + 1, qmul, 15);
-            }
-        } else {
-            fill_rectangle(&h->non_zero_count_cache[scan8[16*p]], 4, 4, 8, 0, 1);
-        }
-    } else {
-        int cqm = (IS_INTRA( mb_type ) ? 0:3) + p;
-        for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
-            if( cbp & (1<<i8x8) ) {
-                if( IS_8x8DCT(mb_type) ) {
-                    const int index = 16*p + 4*i8x8;
-                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[3][p], index,
-                                                scan8x8, h->dequant8_coeff[cqm][qscale], 64);
-                } else {
-                    qmul = h->dequant4_coeff[cqm][qscale];
-                    for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
-                        const int index = 16*p + 4*i8x8 + i4x4;
-                        //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
-//START_TIMER
-                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[2][p], index, scan, qmul, 16);
-//STOP_TIMER("decode_residual")
-                    }
-                }
-            } else {
-                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
-                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-            }
-        }
-    }
-}
-
 /**
  * decodes a macroblock
  * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
@@ -1834,7 +1204,6 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int mb_xy;
     int mb_type, partition_count, cbp = 0;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
-    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -1944,8 +1313,7 @@ decode_intra_mb:
     h->slice_table[ mb_xy ]= h->slice_num;
 
     if(IS_INTRA_PCM(mb_type)) {
-        static const uint16_t mb_sizes[4] = {256,384,512,768};
-        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
+        const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
         const uint8_t *ptr;
 
         // We assume these blocks are very rare so we do not optimize it.
@@ -1958,17 +1326,20 @@ decode_intra_mb:
         }
 
         // The pixels are stored in the same order as levels in h->mb array.
-        memcpy(h->mb, ptr, mb_size); ptr+=mb_size;
+        memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
+        if(CHROMA){
+            memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
+        }
 
         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
 
         // All blocks are present
-        h->cbp_table[mb_xy] = 0xf7ef;
+        h->cbp_table[mb_xy] = 0x1ef;
         h->chroma_pred_mode_table[mb_xy] = 0;
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 48);
+        memset(h->non_zero_count[mb_xy], 16, 32);
         s->current_picture.mb_type[mb_xy]= mb_type;
         h->last_qscale_diff = 0;
         return 0;
@@ -2005,7 +1376,7 @@ decode_intra_mb:
             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
             if( h->intra16x16_pred_mode < 0 ) return -1;
         }
-        if(decode_chroma){
+        if(CHROMA){
             h->chroma_pred_mode_table[mb_xy] =
             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
 
@@ -2234,7 +1605,7 @@ decode_intra_mb:
 
     if( !IS_INTRA16x16( mb_type ) ) {
         cbp  = decode_cabac_mb_cbp_luma( h );
-        if(decode_chroma)
+        if(CHROMA)
             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
     }
 
@@ -2243,28 +1614,6 @@ decode_intra_mb:
     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
         mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
     }
-
-    /* It would be better to do this in fill_decode_caches, but we don't know
-     * the transform mode of the current macroblock there. */
-    if (CHROMA444 && IS_8x8DCT(mb_type)){
-        int i;
-        for (i = 0; i < 2; i++){
-            if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
-                h->non_zero_count_cache[3+8* 1 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 2 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]=
-                h->non_zero_count_cache[3+8*11 + 2*8*i]=
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
-            }
-        }
-        if (h->top_type && !IS_8x8DCT(h->top_type)){
-            uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
-            AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
-            AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
-            AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
-        }
-    }
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if( cbp || IS_INTRA16x16( mb_type ) ) {
@@ -2309,38 +1658,76 @@ decode_intra_mb:
         }else
             h->last_qscale_diff=0;
 
-        decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 0);
-        if(CHROMA444){
-            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
-            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
-        } else {
-            if( cbp&0x30 ){
-                int c;
-                for( c = 0; c < 2; c++ ) {
-                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                    decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
-                }
-            }
+        if( IS_INTRA16x16( mb_type ) ) {
+            int i;
+            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
+            AV_ZERO128(h->mb_luma_dc+0);
+            AV_ZERO128(h->mb_luma_dc+8);
+            AV_ZERO128(h->mb_luma_dc+16);
+            AV_ZERO128(h->mb_luma_dc+24);
+            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
 
-            if( cbp&0x20 ) {
-                int c, i;
-                for( c = 0; c < 2; c++ ) {
-                    qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
-                    for( i = 0; i < 4; i++ ) {
-                        const int index = 16 + 16 * c + i;
-                        //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
-                    }
+            if( cbp&15 ) {
+                qmul = h->dequant4_coeff[0][s->qscale];
+                for( i = 0; i < 16; i++ ) {
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
+                    decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
                 }
             } else {
-                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+            }
+        } else {
+            int i8x8, i4x4;
+            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+                if( cbp & (1<<i8x8) ) {
+                    if( IS_8x8DCT(mb_type) ) {
+                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
+                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
+                    } else {
+                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
+                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+                            const int index = 4*i8x8 + i4x4;
+                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+//START_TIMER
+                            decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
+//STOP_TIMER("decode_residual")
+                        }
+                    }
+                } else {
+                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                }
             }
         }
+
+        if( cbp&0x30 ){
+            int c;
+            for( c = 0; c < 2; c++ ) {
+                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
+                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+            }
+        }
+
+        if( cbp&0x20 ) {
+            int c, i;
+            for( c = 0; c < 2; c++ ) {
+                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
+                for( i = 0; i < 4; i++ ) {
+                    const int index = 16 + 4 * c + i;
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
+                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
+                }
+            }
+        } else {
+            uint8_t * const nnz= &h->non_zero_count_cache[0];
+            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        }
     } else {
-        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
-        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         h->last_qscale_diff = 0;
     }
 
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 497166b423..2e5ea54679 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -371,12 +371,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     //FIXME put trailing_onex into the context
 
-    if(max_coeff <= 8){
+    if(n >= CHROMA_DC_BLOCK_INDEX){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
     }else{
-        if(n >= LUMA_DC_BLOCK_INDEX){
-            total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
+        if(n == LUMA_DC_BLOCK_INDEX){
+            total_coeff= pred_non_zero_count(h, 0);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
             total_coeff= coeff_token>>2;
         }else{
@@ -482,8 +482,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     if(total_coeff == max_coeff)
         zeros_left=0;
     else{
-        /* FIXME: we don't actually support 4:2:2 yet. */
-        if(max_coeff <= 8)
+        if(n >= CHROMA_DC_BLOCK_INDEX)
             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
         else
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
@@ -537,80 +536,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     return 0;
 }
 
-static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
-    int i4x4, i8x8;
-    MpegEncContext * const s = &h->s;
-    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
-    if(IS_INTRA16x16(mb_type)){
-        AV_ZERO128(h->mb_luma_dc[p]+0);
-        AV_ZERO128(h->mb_luma_dc[p]+8);
-        AV_ZERO128(h->mb_luma_dc[p]+16);
-        AV_ZERO128(h->mb_luma_dc[p]+24);
-        if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
-            return -1; //FIXME continue if partitioned and other return -1 too
-        }
-
-        assert((cbp&15) == 0 || (cbp&15) == 15);
-
-        if(cbp&15){
-            for(i8x8=0; i8x8<4; i8x8++){
-                for(i4x4=0; i4x4<4; i4x4++){
-                    const int index= i4x4 + 4*i8x8 + p*16;
-                    if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
-                        index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
-                        return -1;
-                    }
-                }
-            }
-            return 0xf;
-        }else{
-            fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
-            return 0;
-        }
-    }else{
-        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
-        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
-        int new_cbp = 0;
-        for(i8x8=0; i8x8<4; i8x8++){
-            if(cbp & (1<<i8x8)){
-                if(IS_8x8DCT(mb_type)){
-                    DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
-                    uint8_t *nnz;
-                    for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= i4x4 + 4*i8x8 + p*16;
-                        if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
-                                            h->dequant8_coeff[cqm][qscale], 16) < 0 )
-                            return -1;
-                    }
-                    nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
-                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
-                    new_cbp |= !!nnz[0] << i8x8;
-                }else{
-                    for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= i4x4 + 4*i8x8 + p*16;
-                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
-                                            scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
-                            return -1;
-                        }
-                        new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
-                    }
-                }
-            }else{
-                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
-                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-            }
-        }
-        return new_cbp;
-    }
-}
-
 int ff_h264_decode_mb_cavlc(H264Context *h){
     MpegEncContext * const s = &h->s;
     int mb_xy;
     int partition_count;
     unsigned int mb_type, cbp;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
-    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -677,21 +608,19 @@ decode_intra_mb:
 
     if(IS_INTRA_PCM(mb_type)){
         unsigned int x;
-        static const uint16_t mb_sizes[4] = {256,384,512,768};
-        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
 
         // We assume these blocks are very rare so we do not optimize it.
         align_get_bits(&s->gb);
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(x=0; x < mb_size; x++){
+        for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
 
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 48);
+        memset(h->non_zero_count[mb_xy], 16, 32);
 
         s->current_picture.mb_type[mb_xy]= mb_type;
         return 0;
@@ -739,7 +668,7 @@ decode_intra_mb:
             if(h->intra16x16_pred_mode < 0)
                 return -1;
         }
-        if(decode_chroma){
+        if(CHROMA){
             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
             if(pred_mode < 0)
                 return -1;
@@ -967,19 +896,15 @@ decode_intra_mb:
 
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&s->gb);
+        if(cbp > 47){
+            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
+            return -1;
+        }
 
-        if(decode_chroma){
-            if(cbp > 47){
-                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
-                return -1;
-            }
+        if(CHROMA){
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
             else                     cbp= golomb_to_inter_cbp   [cbp];
         }else{
-            if(cbp > 15){
-                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
-                return -1;
-            }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
             else                     cbp= golomb_to_inter_cbp_gray[cbp];
         }
@@ -993,9 +918,8 @@ decode_intra_mb:
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if(cbp || IS_INTRA16x16(mb_type)){
-        int i4x4, chroma_idx;
+        int i8x8, i4x4, chroma_idx;
         int dquant;
-        int ret;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
         const uint8_t *scan, *scan8x8;
         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
@@ -1023,45 +947,85 @@ decode_intra_mb:
 
         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
-
-        if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
-            return -1;
-        }
-        h->cbp_table[mb_xy] |= ret << 12;
-        if(CHROMA444){
-            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
-                return -1;
-            }
-            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
-                return -1;
-            }
-        } else {
-            if(cbp&0x30){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                    if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
-                        return -1;
-                    }
+        if(IS_INTRA16x16(mb_type)){
+            AV_ZERO128(h->mb_luma_dc+0);
+            AV_ZERO128(h->mb_luma_dc+8);
+            AV_ZERO128(h->mb_luma_dc+16);
+            AV_ZERO128(h->mb_luma_dc+24);
+            if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
+                return -1; //FIXME continue if partitioned and other return -1 too
             }
 
-            if(cbp&0x20){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
-                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
+            assert((cbp&15) == 0 || (cbp&15) == 15);
+
+            if(cbp&15){
+                for(i8x8=0; i8x8<4; i8x8++){
                     for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= 16 + 16*chroma_idx + i4x4;
-                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
+                        const int index= i4x4 + 4*i8x8;
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
                             return -1;
                         }
                     }
                 }
             }else{
-                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+            }
+        }else{
+            for(i8x8=0; i8x8<4; i8x8++){
+                if(cbp & (1<<i8x8)){
+                    if(IS_8x8DCT(mb_type)){
+                        DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
+                        uint8_t *nnz;
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
+                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
+                                return -1;
+                        }
+                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
+                    }else{
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            const int index= i4x4 + 4*i8x8;
+
+                            if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+                                return -1;
+                            }
+                        }
+                    }
+                }else{
+                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                }
             }
         }
+
+        if(cbp&0x30){
+            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
+                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+                    return -1;
+                }
+        }
+
+        if(cbp&0x20){
+            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
+                for(i4x4=0; i4x4<4; i4x4++){
+                    const int index= 16 + 4*chroma_idx + i4x4;
+                    if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
+                        return -1;
+                    }
+                }
+            }
+        }else{
+            uint8_t * const nnz= &h->non_zero_count_cache[0];
+            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        }
     }else{
-        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
-        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
     }
     s->current_picture.qscale_table[mb_xy]= s->qscale;
     write_back_non_zero_count(h);
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 1ae534ec96..72b1905936 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -217,11 +217,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     int mb_xy;
     int mb_type, left_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
-    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) {
+    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
@@ -263,18 +262,16 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
         }
-        if(chroma){
-            if(left_type){
-                filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
-                filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
-            }
-            filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
-            filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
-            filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-            filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
-            filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-            filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
+        if(left_type){
+            filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+            filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
         }
+        filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
+        filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
+        filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+        filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
+        filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+        filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         return;
     } else {
         LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
@@ -301,7 +298,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 #define FILTER(hv,dir,edge)\
         if(AV_RN64A(bS[dir][edge])) {                                   \
             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
-            if(chroma && !(edge&1)) {\
+            if(!(edge&1)) {\
                 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
             }\
@@ -356,10 +353,9 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     return v;
 }
 
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
     MpegEncContext * const s = &h->s;
     int edge;
-    int chroma_qp_avg[2];
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
     const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
 
@@ -398,7 +394,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                         bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]);
                         bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]);
                     }else{
-                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8;
                     int i;
                     for( i = 0; i < 4; i++ ) {
                         bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
@@ -411,17 +407,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
                 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
-                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
-                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
-                if (chroma) {
-                    if (chroma444) {
-                        filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                        filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
-                    } else {
-                        filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                        filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
-                    }
-                }
+                filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
+                                ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
+                filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
+                                ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
             }
         }else{
             DECLARE_ALIGNED(8, int16_t, bS)[4];
@@ -476,29 +465,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
                 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
-                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                 if( dir == 0 ) {
                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
-                    if (chroma) {
-                        if (chroma444) {
-                            filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
-                            filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
-                        } else {
-                            filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
-                            filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
-                        }
+                    {
+                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                        filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h);
+                        if(h->pps.chroma_qp_diff)
+                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                        filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h);
                     }
                 } else {
                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
-                    if (chroma) {
-                        if (chroma444) {
-                            filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
-                            filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
-                        } else {
-                            filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
-                            filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
-                        }
+                    {
+                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                        filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h);
+                        if(h->pps.chroma_qp_diff)
+                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                        filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h);
                     }
                 }
             }
@@ -562,25 +545,15 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
-            if (chroma) {
-                if (chroma444) {
-                    filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                    filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
-                } else if( (edge&1) == 0 ) {
-                    filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                    filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
-                }
+            if( (edge&1) == 0 ) {
+                filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
-            if (chroma) {
-                if (chroma444) {
-                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
-                } else if( (edge&1) == 0 ) {
-                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
-                }
+            if( (edge&1) == 0 ) {
+                filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
             }
         }
     }
@@ -593,7 +566,6 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     av_unused int dir;
-    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     if (FRAME_MBAFF
             // and current and left pair do not have the same interlaced type
@@ -617,11 +589,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         } else {
             static const uint8_t offset[2][2][8]={
                 {
-                    {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
-                    {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
+                    {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1},
+                    {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3},
                 },{
-                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
-                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
+                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
+                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
                 }
             };
             const uint8_t *off= offset[MB_FIELD][mb_y&1];
@@ -662,29 +634,25 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         if(MB_FIELD){
             filter_mb_mbaff_edgev ( h, img_y                ,   linesize, bS  , 1, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  + 8*  linesize,   linesize, bS+4, 1, qp [1] );
-            if (chroma){
-                filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
-                filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
-                filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
-                filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
-            }
+            filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
+            filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
+            filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
+            filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
         }else{
             filter_mb_mbaff_edgev ( h, img_y              , 2*  linesize, bS  , 2, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  +   linesize, 2*  linesize, bS+1, 2, qp [1] );
-            if (chroma){
-                filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
-                filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
-                filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
-                filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
-            }
+            filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
+            filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
+            filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
+            filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
         }
     }
 
 #if CONFIG_SMALL
     for( dir = 0; dir < 2; dir++ )
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
 #else
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
 #endif
 }
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 9c41e4ca73..a98f14aaf6 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -269,7 +269,7 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
-        fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
+        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
     };
     if(get_bits1(&s->gb)){
         sps->scaling_matrix_present |= is_sps;
@@ -281,15 +281,7 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
         if(is_sps || pps->transform_8x8_mode){
             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
-            if(h->sps.chroma_format_idc == 3){
-                decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[0],scaling_matrix8[0]);  // Intra, Cr
-                decode_scaling_list(h,scaling_matrix8[2],64,default_scaling8[0],scaling_matrix8[1]);  // Intra, Cb
-            }
-            decode_scaling_list(h,scaling_matrix8[3],64,default_scaling8[1],fallback[3]);  // Inter, Y
-            if(h->sps.chroma_format_idc == 3){
-                decode_scaling_list(h,scaling_matrix8[4],64,default_scaling8[1],scaling_matrix8[3]);  // Inter, Cr
-                decode_scaling_list(h,scaling_matrix8[5],64,default_scaling8[1],scaling_matrix8[4]);  // Inter, Cb
-            }
+            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
         }
     }
 }
@@ -403,7 +395,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
         if(sps->crop_left || sps->crop_top){
             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
         }
-        if(sps->crop_right >= (8<<CHROMA444) || sps->crop_bottom >= (8<<CHROMA444)){
+        if(sps->crop_right >= 8 || sps->crop_bottom >= 8){
             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
         }
     }else{
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 6972725781..864c118bb5 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -66,10 +66,10 @@ typedef struct H264DSPContext{
     void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
 
-    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index e7f9af7fb0..39c9a1c9eb 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -30,19 +30,15 @@
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
 #define AVCODEC_H264IDCT_INTERNAL_H
 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-static const uint8_t scan8[16*3]={
- 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
- 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
- 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
- 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
- 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
- 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
- 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
- 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
- 4+11*8, 5+11*8, 4+12*8, 5+12*8,
- 6+11*8, 7+11*8, 6+12*8, 7+12*8,
- 4+13*8, 5+13*8, 4+14*8, 5+14*8,
- 6+13*8, 7+13*8, 6+14*8, 7+14*8
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
 };
 #endif
 
@@ -194,7 +190,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     }
 }
 
-void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i++){
         int nnz = nnzc[ scan8[i] ];
@@ -205,7 +201,7 @@ void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i++){
         if(nnzc[ scan8[i] ])             FUNCC(idct_internal      )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
@@ -213,7 +209,7 @@ void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTEL
     }
 }
 
-void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i+=4){
         int nnz = nnzc[ scan8[i] ];
@@ -224,15 +220,13 @@ void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
-    int i, j;
-    for(j=1; j<3; j++){
-        for(i=j*16; i<j*16+4; i++){
-            if(nnzc[ scan8[i] ])
-                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
-            else if(((dctcoef*)block)[i*16])
-                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
-        }
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+    int i;
+    for(i=16; i<16+8; i++){
+        if(nnzc[ scan8[i] ])
+            FUNCC(ff_h264_idct_add   )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+        else if(((dctcoef*)block)[i*16])
+            FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
     }
 }
 /**
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 4978d28b49..6a45da8761 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1185,17 +1185,15 @@ void MPV_frame_end(MpegEncContext *s)
        && s->current_picture.reference
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
-            int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
-            int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  ,
-                              s->h_edge_pos             , s->v_edge_pos,
-                              EDGE_WIDTH        , EDGE_WIDTH        , EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos   , s->v_edge_pos   ,
+                              EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize,
-                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
-                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>1, s->v_edge_pos>>1,
+                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize,
-                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
-                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>1, s->v_edge_pos>>1,
+                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
     }
 
     emms_c();
@@ -2286,19 +2284,14 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
         int sides = 0, edge_h;
-        int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
-        int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
         if (y==0) sides |= EDGE_TOP;
         if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
 
         edge_h= FFMIN(h, s->v_edge_pos - y);
 
-        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y         *s->linesize  , s->linesize,
-                          s->h_edge_pos        , edge_h        , EDGE_WIDTH        , EDGE_WIDTH        , sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
-                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
-                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y    *s->linesize  , s->linesize  , s->h_edge_pos   , edge_h   , EDGE_WIDTH  , sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
     }
 
     h= FFMIN(h, s->avctx->height - y);
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 28f04f119b..6db0b290ba 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1978,13 +1978,13 @@ static int frame_start(SnowContext *s){
     if(s->current_picture.data[0]){
         s->dsp.draw_edges(s->current_picture.data[0],
                           s->current_picture.linesize[0], w   , h   ,
-                          EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[1],
                           s->current_picture.linesize[1], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[2],
                           s->current_picture.linesize[2], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
     }
 
     release_buffer(s->avctx);
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 23ab209312..7cde5e5552 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -633,9 +633,8 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8);
     }
     if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
-        memset(h->non_zero_count_cache + 8, 0, 14*8*sizeof(uint8_t));
-        s->dsp.clear_blocks(h->mb+  0);
-        s->dsp.clear_blocks(h->mb+384);
+        memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
+        s->dsp.clear_blocks(h->mb);
     }
 
     if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
@@ -655,8 +654,8 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
     }
     if (IS_INTRA16x16(mb_type)) {
-        AV_ZERO128(h->mb_luma_dc[0]+0);
-        AV_ZERO128(h->mb_luma_dc[0]+8);
+        AV_ZERO128(h->mb_luma_dc+0);
+        AV_ZERO128(h->mb_luma_dc+8);
         if (svq3_decode_block(&s->gb, h->mb_luma_dc, 0, 1)){
             av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
             return -1;
@@ -682,23 +681,20 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
 
         if ((cbp & 0x30)) {
-            for (i = 1; i < 3; ++i) {
-              if (svq3_decode_block(&s->gb, &h->mb[16*16*i], 0, 3)){
+            for (i = 0; i < 2; ++i) {
+              if (svq3_decode_block(&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
                 return -1;
               }
             }
 
             if ((cbp & 0x20)) {
-                for (i = 1; i < 3; i++) {
-                    for (j = 0; j < 4; j++) {
-                        k = 16*i + j;
-                        h->non_zero_count_cache[ scan8[k] ] = 1;
+                for (i = 0; i < 8; i++) {
+                    h->non_zero_count_cache[ scan8[16+i] ] = 1;
 
-                        if (svq3_decode_block(&s->gb, &h->mb[16*k], 1, 1)){
-                            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
-                            return -1;
-                        }
+                    if (svq3_decode_block(&s->gb, &h->mb[16*(16 + i)], 1, 1)){
+                        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
+                        return -1;
                     }
                 }
             }
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 214c6a3945..1cc6991666 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -784,7 +784,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
 
 /* draw the edges of width 'w' of an image of size width, height
    this mmx version can only handle w==8 || w==16 */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides)
 {
     uint8_t *ptr, *last_line;
     int i;
@@ -839,7 +839,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
 
     /* top and bottom (and hopefully also the corners) */
     if (sides&EDGE_TOP) {
-        for(i = 0; i < h; i += 4) {
+        for(i = 0; i < w; i += 4) {
             ptr= buf - (i + 1) * wrap - w;
             __asm__ volatile(
                     "1:                             \n\t"
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index b5f77c90d5..c850dc2ef3 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -36,7 +36,7 @@
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
 static int decode_significance_x86(CABACContext *c, int max_coeff,
                                    uint8_t *significant_coeff_ctx_base,
-                                   int *index, int last_off){
+                                   int *index){
     void *end= significant_coeff_ctx_base + max_coeff - 1;
     int minusstart= -(int)significant_coeff_ctx_base;
     int minusindex= 4-(int)index;
@@ -52,12 +52,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         "test $1, %%edx                         \n\t"
         " jz 3f                                 \n\t"
-        "add  %7, %1                            \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
-        "sub  %7, %1                            \n\t"
         "mov  %2, %%"REG_a"                     \n\t"
         "movl %4, %%ecx                         \n\t"
         "add  %1, %%"REG_c"                     \n\t"
@@ -84,7 +82,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
-        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
+        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
         : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
     );
     return coeff_count;
@@ -92,7 +90,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
 static int decode_significance_8x8_x86(CABACContext *c,
                                        uint8_t *significant_coeff_ctx_base,
-                                       int *index, int last_off, const uint8_t *sig_off){
+                                       int *index, const uint8_t *sig_off){
     int minusindex= 4-(int)index;
     int coeff_count;
     x86_reg last=0;
@@ -116,9 +114,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
         "add %5, %%"REG_D"                      \n\t"
-        "add %7, %%"REG_D"                      \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
         "mov %2, %%"REG_a"                      \n\t"
@@ -145,7 +142,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count),"+m"(last), "+m"(index)
-        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off)
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
         : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
     );
     return coeff_count;
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 4788da98e0..f90f41c4bc 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -32,18 +32,14 @@
 SECTION_RODATA
 
 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
-           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
-           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
-           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
-           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
-           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
-           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
-           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
-           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
-           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
-           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
-           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
+scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
+           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
+           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
+           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
+           db 1+1*8, 2+1*8
+           db 1+2*8, 2+2*8
+           db 1+4*8, 2+4*8
+           db 1+5*8, 2+5*8
 %ifdef PIC
 %define scan8 r11
 %else
@@ -621,8 +617,6 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
     mov         r10, r0
 %endif
     call         h264_idct_add8_mmx_plane
-    mov          r5, 32
-    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -684,8 +678,6 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
     lea         r11, [scan8_mem]
 %endif
     call h264_idct_add8_mmx2_plane
-    mov          r5, 32
-    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -818,12 +810,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     test        r0, r0
     jz .try%1dc
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
+    mov        r0d, dword [r1+%1*8+64]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
+    add         r0, dword [r1+%1*8+64]
 %endif
     call        x264_add8x4_idct_sse2
     jmp .cycle%1end
@@ -832,18 +824,16 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     or         r0w, word [r2+32]
     jz .cycle%1end
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
+    mov        r0d, dword [r1+%1*8+64]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
+    add         r0, dword [r1+%1*8+64]
 %endif
     call        h264_idct_dc_add8_mmx2
 .cycle%1end
-%if %1 == 1
-    add         r2, 384+64
-%elif %1 < 3
+%if %1 < 3
     add         r2, 64
 %endif
 %endmacro
@@ -855,15 +845,15 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8
 %ifdef ARCH_X86_64
     mov         r10, r0
 %endif
-    add8_sse2_cycle 0, 0x34
-    add8_sse2_cycle 1, 0x3c
+    add8_sse2_cycle 0, 0x09
+    add8_sse2_cycle 1, 0x11
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
     add        r0mp, gprsize
 %endif
-    add8_sse2_cycle 2, 0x5c
-    add8_sse2_cycle 3, 0x64
+    add8_sse2_cycle 2, 0x21
+    add8_sse2_cycle 3, 0x29
     RET
 
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 54636a95d0..3f7cf4cefc 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -29,18 +29,14 @@ SECTION_RODATA
 
 pw_pixel_max: times 8 dw ((1 << 10)-1)
 pd_32:        times 4 dd 32
-scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
-           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
-           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
-           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
-           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
-           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
-           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
-           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
-           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
-           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
-           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
-           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
+scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
+           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
+           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
+           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
+           db 1+1*8, 2+1*8
+           db 1+2*8, 2+2*8
+           db 1+4*8, 2+4*8
+           db 1+5*8, 2+5*8
 
 %ifdef PIC
 %define scan8 r11
@@ -310,7 +306,7 @@ INIT_AVX
 IDCT_ADD16INTRA_10 avx
 %endif
 
-%assign last_block 36
+%assign last_block 24
 ;-----------------------------------------------------------------------------
 ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
@@ -321,22 +317,21 @@ cglobal h264_idct_add8_10_%1,5,7
 %endif
     add      r2, 1024
     mov      r0, [r0]
-    ADD16_OP_INTRA %1, 16, 4+ 6*8
-    ADD16_OP_INTRA %1, 18, 4+ 7*8
-    add      r2, 1024-128*2
+    ADD16_OP_INTRA %1, 16, 1+1*8
+    ADD16_OP_INTRA %1, 18, 1+2*8
 %ifdef ARCH_X86_64
     mov      r0, [r10+gprsize]
 %else
     mov      r0, r0m
     mov      r0, [r0+gprsize]
 %endif
-    ADD16_OP_INTRA %1, 32, 4+11*8
-    ADD16_OP_INTRA %1, 34, 4+12*8
+    ADD16_OP_INTRA %1, 20, 1+4*8
+    ADD16_OP_INTRA %1, 22, 1+5*8
     REP_RET
     AC %1, 16
     AC %1, 18
-    AC %1, 32
-    AC %1, 34
+    AC %1, 20
+    AC %1, 22
 
 %endmacro ; IDCT_ADD8
 

From 36151b3e3112cd7d8ae0e02e850dee16bd966696 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 10 Jun 2011 11:45:03 -0400
Subject: [PATCH 791/830] ac3enc: use function pointer to choose between AC-3
 and E-AC-3 header output functions.

---
 libavcodec/ac3enc.c | 11 +++++++----
 libavcodec/ac3enc.h |  2 ++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e8ccde514a..9403bf6443 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1987,10 +1987,7 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame)
 
     init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
 
-    if (CONFIG_EAC3_ENCODER && s->eac3)
-        ff_eac3_output_frame_header(s);
-    else
-        ac3_output_frame_header(s);
+    s->output_frame_header(s);
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
         output_audio_block(s, blk);
@@ -2732,6 +2729,12 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
         s->crc_inv[1] = pow_poly((CRC16_POLY >> 1), (8 * frame_size_58) - 16, CRC16_POLY);
     }
 
+    /* set function pointers */
+    if (CONFIG_EAC3_ENCODER && s->eac3)
+        s->output_frame_header = ff_eac3_output_frame_header;
+    else
+        s->output_frame_header = ac3_output_frame_header;
+
     set_bandwidth(s);
 
     exponent_init(s);
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 0541683537..34ca2e449f 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -209,6 +209,8 @@ typedef struct AC3EncodeContext {
     int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
     DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
+
+    void (*output_frame_header)(struct AC3EncodeContext *s);
 } AC3EncodeContext;
 
 #endif /* AVCODEC_AC3ENC_H */

From e754dfc0bba4f81fe797f240fca94fea5dfd925e Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 10 Jun 2011 12:42:36 -0400
Subject: [PATCH 792/830] ac3enc: dynamically allocate AC3EncodeContext fields
 windowed_samples and mdct

This will allow the same struct to be used for both the fixed and float ac3
encoders.
---
 libavcodec/ac3enc.c | 15 ++++++++++-----
 libavcodec/ac3enc.h |  5 ++---
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 9403bf6443..e71afe62ee 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -273,12 +273,12 @@ static void apply_mdct(AC3EncodeContext *s)
             AC3Block *block = &s->blocks[blk];
             const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
 
-            apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct.window, AC3_WINDOW_SIZE);
+            apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct->window, AC3_WINDOW_SIZE);
 
             block->coeff_shift[ch+1] = normalize_samples(s);
 
-            s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch+1],
-                                   s->windowed_samples);
+            s->mdct->fft.mdct_calcw(&s->mdct->fft, block->mdct_coef[ch+1],
+                                    s->windowed_samples);
         }
     }
 }
@@ -2318,6 +2318,7 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
     int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
 
+    av_freep(&s->windowed_samples);
     for (ch = 0; ch < s->channels; ch++)
         av_freep(&s->planar_samples[ch]);
     av_freep(&s->planar_samples);
@@ -2343,7 +2344,8 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
         av_freep(&block->qmant);
     }
 
-    mdct_end(&s->mdct);
+    mdct_end(s->mdct);
+    av_freep(&s->mdct);
 
     av_freep(&avctx->coded_frame);
     return 0;
@@ -2598,6 +2600,8 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
     int channels = s->channels + 1; /* includes coupling channel */
 
+    FF_ALLOC_OR_GOTO(avctx, s->windowed_samples, AC3_WINDOW_SIZE *
+                     sizeof(*s->windowed_samples), alloc_fail);
     FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples),
                      alloc_fail);
     for (ch = 0; ch < s->channels; ch++) {
@@ -2741,7 +2745,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
 
     bit_alloc_init(s);
 
-    ret = mdct_init(avctx, &s->mdct, 9);
+    FF_ALLOCZ_OR_GOTO(avctx, s->mdct, sizeof(AC3MDCTContext), init_fail);
+    ret = mdct_init(avctx, s->mdct, 9);
     if (ret)
         goto init_fail;
 
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 34ca2e449f..ccdb963a7c 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -128,7 +128,7 @@ typedef struct AC3EncodeContext {
     PutBitContext pb;                       ///< bitstream writer context
     DSPContext dsp;
     AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
-    AC3MDCTContext mdct;                    ///< MDCT context
+    AC3MDCTContext *mdct;                   ///< MDCT context
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
@@ -189,6 +189,7 @@ typedef struct AC3EncodeContext {
     int frame_bits;                         ///< all frame bits except exponents and mantissas
     int exponent_bits;                      ///< number of bits used for exponents
 
+    SampleType *windowed_samples;
     SampleType **planar_samples;
     uint8_t *bap_buffer;
     uint8_t *bap1_buffer;
@@ -208,8 +209,6 @@ typedef struct AC3EncodeContext {
     uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
     int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
-    DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
-
     void (*output_frame_header)(struct AC3EncodeContext *s);
 } AC3EncodeContext;
 

From e0cc66df61664bb6f9271d9aae3c778e1f906b4c Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 10 Jun 2011 14:57:19 -0400
Subject: [PATCH 793/830] ac3enc: split templated float vs. fixed functions
 into a separate file.

Function pointers are used for templated functions instead of needlessly
duplicating many functions.
---
 libavcodec/Makefile               |   9 +-
 libavcodec/ac3enc.c               | 438 +++---------------------------
 libavcodec/ac3enc.h               |  79 ++++++
 libavcodec/ac3enc_fixed.c         |  40 ++-
 libavcodec/ac3enc_float.c         |  52 ++--
 libavcodec/ac3enc_opts_template.c |   3 +
 libavcodec/ac3enc_template.c      | 377 +++++++++++++++++++++++++
 libavcodec/eac3enc.c              |  24 ++
 8 files changed, 577 insertions(+), 445 deletions(-)
 create mode 100644 libavcodec/ac3enc_template.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 581d6bf399..0cfa08c1c3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -60,8 +60,9 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                           mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
 OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
-OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
-OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3.o
+OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
+                                          ac3.o kbdwin.o
+OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
 OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
 OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o
 OBJS-$(CONFIG_ALS_DECODER)             += alsdec.o bgmc.o mpeg4audio.o
@@ -124,8 +125,8 @@ OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3dec.o eac3dec_data.o
-OBJS-$(CONFIG_EAC3_ENCODER)            += eac3enc.o ac3enc_float.o ac3tab.o \
-                                          ac3.o kbdwin.o
+OBJS-$(CONFIG_EAC3_ENCODER)            += eac3enc.o ac3enc.o ac3enc_float.o \
+                                          ac3tab.o ac3.o kbdwin.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
 OBJS-$(CONFIG_EAMAD_DECODER)           += eamad.o eaidct.o mpeg12.o \
                                           mpeg12data.o mpegvideo.o  \
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e71afe62ee..1147ed142e 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -67,46 +67,6 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = {
 };
 
 
-#define OFFSET(param) offsetof(AC3EncodeContext, options.param)
-#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
-
-#define AC3ENC_TYPE_AC3_FIXED   0
-#define AC3ENC_TYPE_AC3         1
-#define AC3ENC_TYPE_EAC3        2
-
-#if CONFIG_AC3ENC_FLOAT
-#define AC3ENC_TYPE AC3ENC_TYPE_AC3
-#include "ac3enc_opts_template.c"
-static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                ac3_options, LIBAVUTIL_VERSION_INT };
-#undef AC3ENC_TYPE
-#define AC3ENC_TYPE AC3ENC_TYPE_EAC3
-#include "ac3enc_opts_template.c"
-static AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name,
-                                 eac3_options, LIBAVUTIL_VERSION_INT };
-#else
-#define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
-#include "ac3enc_opts_template.c"
-static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name,
-                                ac3fixed_options, LIBAVUTIL_VERSION_INT };
-#endif
-
-
-/* prototypes for functions in ac3enc_fixed.c and ac3enc_float.c */
-
-static av_cold void mdct_end(AC3MDCTContext *mdct);
-
-static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
-                             int nbits);
-
-static void apply_window(DSPContext *dsp, SampleType *output, const SampleType *input,
-                         const SampleType *window, unsigned int len);
-
-static int normalize_samples(AC3EncodeContext *s);
-
-static void scale_coefficients(AC3EncodeContext *s);
-
-
 /**
  * LUT for number of exponent groups.
  * exponent_group_tab[coupling][exponent strategy-1][number of coefficients]
@@ -117,7 +77,7 @@ static uint8_t exponent_group_tab[2][3][256];
 /**
  * List of supported channel layouts.
  */
-static const int64_t ac3_channel_layouts[] = {
+const int64_t ff_ac3_channel_layouts[19] = {
      AV_CH_LAYOUT_MONO,
      AV_CH_LAYOUT_STEREO,
      AV_CH_LAYOUT_2_1,
@@ -230,60 +190,6 @@ static void adjust_frame_size(AC3EncodeContext *s)
 }
 
 
-/**
- * Deinterleave input samples.
- * Channels are reordered from Libav's default order to AC-3 order.
- */
-static void deinterleave_input_samples(AC3EncodeContext *s,
-                                       const SampleType *samples)
-{
-    int ch, i;
-
-    /* deinterleave and remap input samples */
-    for (ch = 0; ch < s->channels; ch++) {
-        const SampleType *sptr;
-        int sinc;
-
-        /* copy last 256 samples of previous frame to the start of the current frame */
-        memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
-               AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
-
-        /* deinterleave */
-        sinc = s->channels;
-        sptr = samples + s->channel_map[ch];
-        for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
-            s->planar_samples[ch][i] = *sptr;
-            sptr += sinc;
-        }
-    }
-}
-
-
-/**
- * Apply the MDCT to input samples to generate frequency coefficients.
- * This applies the KBD window and normalizes the input to reduce precision
- * loss due to fixed-point calculations.
- */
-static void apply_mdct(AC3EncodeContext *s)
-{
-    int blk, ch;
-
-    for (ch = 0; ch < s->channels; ch++) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
-
-            apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct->window, AC3_WINDOW_SIZE);
-
-            block->coeff_shift[ch+1] = normalize_samples(s);
-
-            s->mdct->fft.mdct_calcw(&s->mdct->fft, block->mdct_coef[ch+1],
-                                    s->windowed_samples);
-        }
-    }
-}
-
-
 static void compute_coupling_strategy(AC3EncodeContext *s)
 {
     int blk, ch;
@@ -345,296 +251,6 @@ static void compute_coupling_strategy(AC3EncodeContext *s)
 }
 
 
-/**
- * Calculate a single coupling coordinate.
- */
-static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
-{
-    float coord = 0.125;
-    if (energy_cpl > 0)
-        coord *= sqrtf(energy_ch / energy_cpl);
-    return coord;
-}
-
-
-/**
- * Calculate coupling channel and coupling coordinates.
- * TODO: Currently this is only used for the floating-point encoder. I was
- *       able to make it work for the fixed-point encoder, but quality was
- *       generally lower in most cases than not using coupling. If a more
- *       adaptive coupling strategy were to be implemented it might be useful
- *       at that time to use coupling for the fixed-point encoder as well.
- */
-static void apply_channel_coupling(AC3EncodeContext *s)
-{
-#if CONFIG_AC3ENC_FLOAT
-    LOCAL_ALIGNED_16(float,   cpl_coords,       [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
-    LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
-    int blk, ch, bnd, i, j;
-    CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
-    int num_cpl_coefs = s->num_cpl_subbands * 12;
-
-    memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
-    memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
-
-    /* calculate coupling channel from fbw channels */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        AC3Block *block = &s->blocks[blk];
-        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
-        if (!block->cpl_in_use)
-            continue;
-        memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
-        for (ch = 1; ch <= s->fbw_channels; ch++) {
-            CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
-            if (!block->channel_in_cpl[ch])
-                continue;
-            for (i = 0; i < num_cpl_coefs; i++)
-                cpl_coef[i] += ch_coef[i];
-        }
-        /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
-                 will always be a multiple of 12, so we need to subtract 1 from
-                 the start and add 4 to the length when using optimized
-                 functions which require 16-byte alignment. */
-
-        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
-        s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
-
-        /* scale coupling coefficients from float to 24-bit fixed-point */
-        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
-                                   cpl_coef-1, num_cpl_coefs+4);
-    }
-
-    /* calculate energy in each band in coupling channel and each fbw channel */
-    /* TODO: possibly use SIMD to speed up energy calculation */
-    bnd = 0;
-    i = s->start_freq[CPL_CH];
-    while (i < s->cpl_end_freq) {
-        int band_size = s->cpl_band_sizes[bnd];
-        for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
-            for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-                AC3Block *block = &s->blocks[blk];
-                if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
-                    continue;
-                for (j = 0; j < band_size; j++) {
-                    CoefType v = block->mdct_coef[ch][i+j];
-                    MAC_COEF(energy[blk][ch][bnd], v, v);
-                }
-            }
-        }
-        i += band_size;
-        bnd++;
-    }
-
-    /* determine which blocks to send new coupling coordinates for */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        AC3Block *block  = &s->blocks[blk];
-        AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
-        int new_coords = 0;
-        CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,};
-
-        if (block->cpl_in_use) {
-            /* calculate coupling coordinates for all blocks and calculate the
-               average difference between coordinates in successive blocks */
-            for (ch = 1; ch <= s->fbw_channels; ch++) {
-                if (!block->channel_in_cpl[ch])
-                    continue;
-
-                for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
-                    cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd],
-                                                              energy[blk][CPL_CH][bnd]);
-                    if (blk > 0 && block0->cpl_in_use &&
-                        block0->channel_in_cpl[ch]) {
-                        coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] -
-                                               cpl_coords[blk  ][ch][bnd]);
-                    }
-                }
-                coord_diff[ch] /= s->num_cpl_bands;
-            }
-
-            /* send new coordinates if this is the first block, if previous
-             * block did not use coupling but this block does, the channels
-             * using coupling has changed from the previous block, or the
-             * coordinate difference from the last block for any channel is
-             * greater than a threshold value. */
-            if (blk == 0) {
-                new_coords = 1;
-            } else if (!block0->cpl_in_use) {
-                new_coords = 1;
-            } else {
-                for (ch = 1; ch <= s->fbw_channels; ch++) {
-                    if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) {
-                        new_coords = 1;
-                        break;
-                    }
-                }
-                if (!new_coords) {
-                    for (ch = 1; ch <= s->fbw_channels; ch++) {
-                        if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) {
-                            new_coords = 1;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        block->new_cpl_coords = new_coords;
-    }
-
-    /* calculate final coupling coordinates, taking into account reusing of
-       coordinates in successive blocks */
-    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
-        blk = 0;
-        while (blk < AC3_MAX_BLOCKS) {
-            int blk1;
-            CoefSumType energy_cpl;
-            AC3Block *block  = &s->blocks[blk];
-
-            if (!block->cpl_in_use) {
-                blk++;
-                continue;
-            }
-
-            energy_cpl = energy[blk][CPL_CH][bnd];
-            blk1 = blk+1;
-            while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
-                if (s->blocks[blk1].cpl_in_use)
-                    energy_cpl += energy[blk1][CPL_CH][bnd];
-                blk1++;
-            }
-
-            for (ch = 1; ch <= s->fbw_channels; ch++) {
-                CoefType energy_ch;
-                if (!block->channel_in_cpl[ch])
-                    continue;
-                energy_ch = energy[blk][ch][bnd];
-                blk1 = blk+1;
-                while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
-                    if (s->blocks[blk1].cpl_in_use)
-                        energy_ch += energy[blk1][ch][bnd];
-                    blk1++;
-                }
-                cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl);
-            }
-            blk = blk1;
-        }
-    }
-
-    /* calculate exponents/mantissas for coupling coordinates */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        AC3Block *block = &s->blocks[blk];
-        if (!block->cpl_in_use || !block->new_cpl_coords)
-            continue;
-
-        s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
-                                   cpl_coords[blk][1],
-                                   s->fbw_channels * 16);
-        s->ac3dsp.extract_exponents(block->cpl_coord_exp[1],
-                                    fixed_cpl_coords[blk][1],
-                                    s->fbw_channels * 16);
-
-        for (ch = 1; ch <= s->fbw_channels; ch++) {
-            int bnd, min_exp, max_exp, master_exp;
-
-            /* determine master exponent */
-            min_exp = max_exp = block->cpl_coord_exp[ch][0];
-            for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
-                int exp = block->cpl_coord_exp[ch][bnd];
-                min_exp = FFMIN(exp, min_exp);
-                max_exp = FFMAX(exp, max_exp);
-            }
-            master_exp = ((max_exp - 15) + 2) / 3;
-            master_exp = FFMAX(master_exp, 0);
-            while (min_exp < master_exp * 3)
-                master_exp--;
-            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
-                block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] -
-                                                        master_exp * 3, 0, 15);
-            }
-            block->cpl_master_exp[ch] = master_exp;
-
-            /* quantize mantissas */
-            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
-                int cpl_exp  = block->cpl_coord_exp[ch][bnd];
-                int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24;
-                if (cpl_exp == 15)
-                    cpl_mant >>= 1;
-                else
-                    cpl_mant -= 16;
-
-                block->cpl_coord_mant[ch][bnd] = cpl_mant;
-            }
-        }
-    }
-
-    if (CONFIG_EAC3_ENCODER && s->eac3)
-        ff_eac3_set_cpl_states(s);
-#endif /* CONFIG_AC3ENC_FLOAT */
-}
-
-
-/**
- * Determine rematrixing flags for each block and band.
- */
-static void compute_rematrixing_strategy(AC3EncodeContext *s)
-{
-    int nb_coefs;
-    int blk, bnd, i;
-    AC3Block *block, *block0;
-
-    if (s->channel_mode != AC3_CHMODE_STEREO)
-        return;
-
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        block = &s->blocks[blk];
-        block->new_rematrixing_strategy = !blk;
-
-        if (!s->rematrixing_enabled) {
-            block0 = block;
-            continue;
-        }
-
-        block->num_rematrixing_bands = 4;
-        if (block->cpl_in_use) {
-            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61);
-            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37);
-            if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands)
-                block->new_rematrixing_strategy = 1;
-        }
-        nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
-
-        for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
-            /* calculate calculate sum of squared coeffs for one band in one block */
-            int start = ff_ac3_rematrix_band_tab[bnd];
-            int end   = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
-            CoefSumType sum[4] = {0,};
-            for (i = start; i < end; i++) {
-                CoefType lt = block->mdct_coef[1][i];
-                CoefType rt = block->mdct_coef[2][i];
-                CoefType md = lt + rt;
-                CoefType sd = lt - rt;
-                MAC_COEF(sum[0], lt, lt);
-                MAC_COEF(sum[1], rt, rt);
-                MAC_COEF(sum[2], md, md);
-                MAC_COEF(sum[3], sd, sd);
-            }
-
-            /* compare sums to determine if rematrixing will be used for this band */
-            if (FFMIN(sum[2], sum[3]) < FFMIN(sum[0], sum[1]))
-                block->rematrixing_flags[bnd] = 1;
-            else
-                block->rematrixing_flags[bnd] = 0;
-
-            /* determine if new rematrixing flags will be sent */
-            if (blk &&
-                block->rematrixing_flags[bnd] != block0->rematrixing_flags[bnd]) {
-                block->new_rematrixing_strategy = 1;
-            }
-        }
-        block0 = block;
-    }
-}
-
-
 /**
  * Apply stereo rematrixing to coefficients based on rematrixing flags.
  */
@@ -1467,7 +1083,7 @@ static int compute_bit_allocation(AC3EncodeContext *s)
         if (s->cpl_on) {
             s->cpl_on = 0;
             compute_coupling_strategy(s);
-            compute_rematrixing_strategy(s);
+            s->compute_rematrixing_strategy(s);
             apply_rematrixing(s);
             process_exponents(s);
             ret = compute_bit_allocation(s);
@@ -2262,8 +1878,8 @@ static int validate_metadata(AVCodecContext *avctx)
 /**
  * Encode a single AC-3 frame.
  */
-static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
-                            int buf_size, void *data)
+int ff_ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
+                        int buf_size, void *data)
 {
     AC3EncodeContext *s = avctx->priv_data;
     const SampleType *samples = data;
@@ -2278,19 +1894,19 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
     if (s->bit_alloc.sr_code == 1 || s->eac3)
         adjust_frame_size(s);
 
-    deinterleave_input_samples(s, samples);
+    s->deinterleave_input_samples(s, samples);
 
-    apply_mdct(s);
+    s->apply_mdct(s);
 
-    scale_coefficients(s);
+    s->scale_coefficients(s);
 
     s->cpl_on = s->cpl_enabled;
     compute_coupling_strategy(s);
 
     if (s->cpl_on)
-        apply_channel_coupling(s);
+        s->apply_channel_coupling(s);
 
-    compute_rematrixing_strategy(s);
+    s->compute_rematrixing_strategy(s);
 
     apply_rematrixing(s);
 
@@ -2313,7 +1929,7 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
 /**
  * Finalize encoding and free any memory allocated by the encoder.
  */
-static av_cold int ac3_encode_close(AVCodecContext *avctx)
+av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
 {
     int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
@@ -2344,7 +1960,7 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
         av_freep(&block->qmant);
     }
 
-    mdct_end(s->mdct);
+    s->mdct_end(s->mdct);
     av_freep(&s->mdct);
 
     av_freep(&avctx->coded_frame);
@@ -2515,8 +2131,7 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
                              (s->channel_mode == AC3_CHMODE_STEREO);
 
     s->cpl_enabled = s->options.channel_coupling &&
-                     s->channel_mode >= AC3_CHMODE_STEREO &&
-                     CONFIG_AC3ENC_FLOAT;
+                     s->channel_mode >= AC3_CHMODE_STEREO && !s->fixed_point;
 
     return 0;
 }
@@ -2674,7 +2289,7 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
         }
     }
 
-    if (CONFIG_AC3ENC_FLOAT) {
+    if (!s->fixed_point) {
         FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
                           AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
@@ -2703,7 +2318,7 @@ alloc_fail:
 /**
  * Initialize the encoder.
  */
-static av_cold int ac3_encode_init(AVCodecContext *avctx)
+av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
     int ret, frame_size_58;
@@ -2734,6 +2349,27 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     }
 
     /* set function pointers */
+    if (CONFIG_AC3_FIXED_ENCODER && s->fixed_point) {
+        s->mdct_end                     = ff_ac3_fixed_mdct_end;
+        s->mdct_init                    = ff_ac3_fixed_mdct_init;
+        s->apply_window                 = ff_ac3_fixed_apply_window;
+        s->normalize_samples            = ff_ac3_fixed_normalize_samples;
+        s->scale_coefficients           = ff_ac3_fixed_scale_coefficients;
+        s->deinterleave_input_samples   = ff_ac3_fixed_deinterleave_input_samples;
+        s->apply_mdct                   = ff_ac3_fixed_apply_mdct;
+        s->apply_channel_coupling       = ff_ac3_fixed_apply_channel_coupling;
+        s->compute_rematrixing_strategy = ff_ac3_fixed_compute_rematrixing_strategy;
+    } else if (CONFIG_AC3_ENCODER || CONFIG_EAC3_ENCODER) {
+        s->mdct_end                     = ff_ac3_float_mdct_end;
+        s->mdct_init                    = ff_ac3_float_mdct_init;
+        s->apply_window                 = ff_ac3_float_apply_window;
+        s->normalize_samples            = ff_ac3_float_normalize_samples;
+        s->scale_coefficients           = ff_ac3_float_scale_coefficients;
+        s->deinterleave_input_samples   = ff_ac3_float_deinterleave_input_samples;
+        s->apply_mdct                   = ff_ac3_float_apply_mdct;
+        s->apply_channel_coupling       = ff_ac3_float_apply_channel_coupling;
+        s->compute_rematrixing_strategy = ff_ac3_float_compute_rematrixing_strategy;
+    }
     if (CONFIG_EAC3_ENCODER && s->eac3)
         s->output_frame_header = ff_eac3_output_frame_header;
     else
@@ -2746,7 +2382,7 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     bit_alloc_init(s);
 
     FF_ALLOCZ_OR_GOTO(avctx, s->mdct, sizeof(AC3MDCTContext), init_fail);
-    ret = mdct_init(avctx, s->mdct, 9);
+    ret = s->mdct_init(avctx, s->mdct, 9);
     if (ret)
         goto init_fail;
 
@@ -2763,6 +2399,6 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
 
     return 0;
 init_fail:
-    ac3_encode_close(avctx);
+    ff_ac3_encode_close(avctx);
     return ret;
 }
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index ccdb963a7c..e9d7e0a83a 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -40,18 +40,28 @@
 #define CONFIG_AC3ENC_FLOAT 0
 #endif
 
+#define OFFSET(param) offsetof(AC3EncodeContext, options.param)
+#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+
+#define AC3ENC_TYPE_AC3_FIXED   0
+#define AC3ENC_TYPE_AC3         1
+#define AC3ENC_TYPE_EAC3        2
+
 #if CONFIG_AC3ENC_FLOAT
+#define AC3_NAME(x) ff_ac3_float_ ## x
 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
 typedef float SampleType;
 typedef float CoefType;
 typedef float CoefSumType;
 #else
+#define AC3_NAME(x) ff_ac3_fixed_ ## x
 #define MAC_COEF(d,a,b) MAC64(d,a,b)
 typedef int16_t SampleType;
 typedef int32_t CoefType;
 typedef int64_t CoefSumType;
 #endif
 
+
 typedef struct AC3MDCTContext {
     const SampleType *window;           ///< MDCT window function
     FFTContext fft;                     ///< FFT context for MDCT calculation
@@ -132,6 +142,7 @@ typedef struct AC3EncodeContext {
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
+    int fixed_point;                        ///< indicates if fixed-point encoder is being used
     int eac3;                               ///< indicates if this is E-AC-3 vs. AC-3
     int bitstream_id;                       ///< bitstream id                           (bsid)
     int bitstream_mode;                     ///< bitstream mode                         (bsmod)
@@ -209,7 +220,75 @@ typedef struct AC3EncodeContext {
     uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
     int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
+    /* fixed vs. float function pointers */
+    void (*mdct_end)(AC3MDCTContext *mdct);
+    int  (*mdct_init)(AVCodecContext *avctx, AC3MDCTContext *mdct, int nbits);
+    void (*apply_window)(DSPContext *dsp, SampleType *output,
+                         const SampleType *input, const SampleType *window,
+                         unsigned int len);
+    int  (*normalize_samples)(struct AC3EncodeContext *s);
+    void (*scale_coefficients)(struct AC3EncodeContext *s);
+
+    /* fixed vs. float templated function pointers */
+    void (*deinterleave_input_samples)(struct AC3EncodeContext *s,
+                                       const SampleType *samples);
+    void (*apply_mdct)(struct AC3EncodeContext *s);
+    void (*apply_channel_coupling)(struct AC3EncodeContext *s);
+    void (*compute_rematrixing_strategy)(struct AC3EncodeContext *s);
+
+    /* AC-3 vs. E-AC-3 function pointers */
     void (*output_frame_header)(struct AC3EncodeContext *s);
 } AC3EncodeContext;
 
+
+extern const int64_t ff_ac3_channel_layouts[19];
+
+int ff_ac3_encode_init(AVCodecContext *avctx);
+
+int ff_ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
+                        int buf_size, void *data);
+
+int ff_ac3_encode_close(AVCodecContext *avctx);
+
+
+/* prototypes for functions in ac3enc_fixed.c and ac3enc_float.c */
+
+void ff_ac3_fixed_mdct_end(AC3MDCTContext *mdct);
+void ff_ac3_float_mdct_end(AC3MDCTContext *mdct);
+
+int ff_ac3_fixed_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
+                           int nbits);
+int ff_ac3_float_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
+                           int nbits);
+
+void ff_ac3_fixed_apply_window(DSPContext *dsp, SampleType *output,
+                               const SampleType *input,
+                               const SampleType *window, unsigned int len);
+void ff_ac3_float_apply_window(DSPContext *dsp, SampleType *output,
+                               const SampleType *input,
+                               const SampleType *window, unsigned int len);
+
+int ff_ac3_fixed_normalize_samples(AC3EncodeContext *s);
+int ff_ac3_float_normalize_samples(AC3EncodeContext *s);
+
+void ff_ac3_fixed_scale_coefficients(AC3EncodeContext *s);
+void ff_ac3_float_scale_coefficients(AC3EncodeContext *s);
+
+
+/* prototypes for functions in ac3enc_template.c */
+
+void ff_ac3_fixed_deinterleave_input_samples(AC3EncodeContext *s,
+                                             const SampleType *samples);
+void ff_ac3_float_deinterleave_input_samples(AC3EncodeContext *s,
+                                             const SampleType *samples);
+
+void ff_ac3_fixed_apply_mdct(AC3EncodeContext *s);
+void ff_ac3_float_apply_mdct(AC3EncodeContext *s);
+
+void ff_ac3_fixed_apply_channel_coupling(AC3EncodeContext *s);
+void ff_ac3_float_apply_channel_coupling(AC3EncodeContext *s);
+
+void ff_ac3_fixed_compute_rematrixing_strategy(AC3EncodeContext *s);
+void ff_ac3_float_compute_rematrixing_strategy(AC3EncodeContext *s);
+
 #endif /* AVCODEC_AC3ENC_H */
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index 035ebb3de9..0620a6ac1a 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -28,13 +28,20 @@
 
 #define CONFIG_FFT_FLOAT 0
 #undef CONFIG_AC3ENC_FLOAT
-#include "ac3enc.c"
+#include "ac3enc.h"
+
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
+#include "ac3enc_opts_template.c"
+static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name,
+                                ac3fixed_options, LIBAVUTIL_VERSION_INT };
+
+#include "ac3enc_template.c"
 
 
 /**
  * Finalize MDCT and free allocated memory.
  */
-static av_cold void mdct_end(AC3MDCTContext *mdct)
+av_cold void AC3_NAME(mdct_end)(AC3MDCTContext *mdct)
 {
     ff_mdct_end(&mdct->fft);
 }
@@ -44,8 +51,8 @@ static av_cold void mdct_end(AC3MDCTContext *mdct)
  * Initialize MDCT tables.
  * @param nbits log2(MDCT size)
  */
-static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
-                             int nbits)
+av_cold int AC3_NAME(mdct_init)(AVCodecContext *avctx, AC3MDCTContext *mdct,
+                                int nbits)
 {
     int ret = ff_mdct_init(&mdct->fft, nbits, 0, -1.0);
     mdct->window = ff_ac3_window;
@@ -56,8 +63,9 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
 /**
  * Apply KBD window to input samples prior to MDCT.
  */
-static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input,
-                         const int16_t *window, unsigned int len)
+void AC3_NAME(apply_window)(DSPContext *dsp, int16_t *output,
+                            const int16_t *input, const int16_t *window,
+                            unsigned int len)
 {
     dsp->apply_window_int16(output, input, window, len);
 }
@@ -82,7 +90,7 @@ static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
  *
  * @return exponent shift
  */
-static int normalize_samples(AC3EncodeContext *s)
+int AC3_NAME(normalize_samples)(AC3EncodeContext *s)
 {
     int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
     if (v > 0)
@@ -95,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s)
 /**
  * Scale MDCT coefficients to 25-bit signed fixed-point.
  */
-static void scale_coefficients(AC3EncodeContext *s)
+void AC3_NAME(scale_coefficients)(AC3EncodeContext *s)
 {
     int blk, ch;
 
@@ -109,17 +117,25 @@ static void scale_coefficients(AC3EncodeContext *s)
 }
 
 
+static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
+{
+    AC3EncodeContext *s = avctx->priv_data;
+    s->fixed_point = 1;
+    return ff_ac3_encode_init(avctx);
+}
+
+
 AVCodec ff_ac3_fixed_encoder = {
     "ac3_fixed",
     AVMEDIA_TYPE_AUDIO,
     CODEC_ID_AC3,
     sizeof(AC3EncodeContext),
-    ac3_encode_init,
-    ac3_encode_frame,
-    ac3_encode_close,
+    ac3_fixed_encode_init,
+    ff_ac3_encode_frame,
+    ff_ac3_encode_close,
     NULL,
     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
     .priv_class = &ac3enc_class,
-    .channel_layouts = ac3_channel_layouts,
+    .channel_layouts = ff_ac3_channel_layouts,
 };
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 012c31de5d..9c7e88ed1c 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -27,14 +27,25 @@
  */
 
 #define CONFIG_AC3ENC_FLOAT 1
-#include "ac3enc.c"
+#include "ac3enc.h"
+#include "eac3enc.h"
 #include "kbdwin.h"
 
 
+#if CONFIG_AC3_ENCODER
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3
+#include "ac3enc_opts_template.c"
+static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
+                                ac3_options, LIBAVUTIL_VERSION_INT };
+#endif
+
+#include "ac3enc_template.c"
+
+
 /**
  * Finalize MDCT and free allocated memory.
  */
-static av_cold void mdct_end(AC3MDCTContext *mdct)
+av_cold void ff_ac3_float_mdct_end(AC3MDCTContext *mdct)
 {
     ff_mdct_end(&mdct->fft);
     av_freep(&mdct->window);
@@ -45,8 +56,8 @@ static av_cold void mdct_end(AC3MDCTContext *mdct)
  * Initialize MDCT tables.
  * @param nbits log2(MDCT size)
  */
-static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
-                             int nbits)
+av_cold int ff_ac3_float_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
+                                   int nbits)
 {
     float *window;
     int i, n, n2;
@@ -71,8 +82,9 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
 /**
  * Apply KBD window to input samples prior to MDCT.
  */
-static void apply_window(DSPContext *dsp, float *output, const float *input,
-                         const float *window, unsigned int len)
+void ff_ac3_float_apply_window(DSPContext *dsp, float *output,
+                               const float *input, const float *window,
+                               unsigned int len)
 {
     dsp->vector_fmul(output, input, window, len);
 }
@@ -81,7 +93,7 @@ static void apply_window(DSPContext *dsp, float *output, const float *input,
 /**
  * Normalize the input samples to use the maximum available precision.
  */
-static int normalize_samples(AC3EncodeContext *s)
+int ff_ac3_float_normalize_samples(AC3EncodeContext *s)
 {
     /* Normalization is not needed for floating-point samples, so just return 0 */
     return 0;
@@ -91,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s)
 /**
  * Scale MDCT coefficients from float to 24-bit fixed-point.
  */
-static void scale_coefficients(AC3EncodeContext *s)
+void ff_ac3_float_scale_coefficients(AC3EncodeContext *s)
 {
     int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
     s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
@@ -106,29 +118,13 @@ AVCodec ff_ac3_encoder = {
     AVMEDIA_TYPE_AUDIO,
     CODEC_ID_AC3,
     sizeof(AC3EncodeContext),
-    ac3_encode_init,
-    ac3_encode_frame,
-    ac3_encode_close,
+    ff_ac3_encode_init,
+    ff_ac3_encode_frame,
+    ff_ac3_encode_close,
     NULL,
     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
     .priv_class = &ac3enc_class,
-    .channel_layouts = ac3_channel_layouts,
-};
-#endif
-
-#if CONFIG_EAC3_ENCODER
-AVCodec ff_eac3_encoder = {
-    .name            = "eac3",
-    .type            = AVMEDIA_TYPE_AUDIO,
-    .id              = CODEC_ID_EAC3,
-    .priv_data_size  = sizeof(AC3EncodeContext),
-    .init            = ac3_encode_init,
-    .encode          = ac3_encode_frame,
-    .close           = ac3_encode_close,
-    .sample_fmts     = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
-    .long_name       = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"),
-    .priv_class      = &eac3enc_class,
-    .channel_layouts = ac3_channel_layouts,
+    .channel_layouts = ff_ac3_channel_layouts,
 };
 #endif
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index e16e0d0878..39138a1083 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/opt.h"
+#include "ac3.h"
+
 #if AC3ENC_TYPE == AC3ENC_TYPE_AC3_FIXED
 static const AVOption ac3fixed_options[] = {
 #elif AC3ENC_TYPE == AC3ENC_TYPE_AC3
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
new file mode 100644
index 0000000000..d88fa225a1
--- /dev/null
+++ b/libavcodec/ac3enc_template.c
@@ -0,0 +1,377 @@
+/*
+ * AC-3 encoder float/fixed template
+ * Copyright (c) 2000 Fabrice Bellard
+ * Copyright (c) 2006-2011 Justin Ruggles <justin.ruggles@gmail.com>
+ * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AC-3 encoder float/fixed template
+ */
+
+#include <stdint.h>
+
+#include "ac3enc.h"
+
+
+/**
+ * Deinterleave input samples.
+ * Channels are reordered from Libav's default order to AC-3 order.
+ */
+void AC3_NAME(deinterleave_input_samples)(AC3EncodeContext *s,
+                                          const SampleType *samples)
+{
+    int ch, i;
+
+    /* deinterleave and remap input samples */
+    for (ch = 0; ch < s->channels; ch++) {
+        const SampleType *sptr;
+        int sinc;
+
+        /* copy last 256 samples of previous frame to the start of the current frame */
+        memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
+               AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
+
+        /* deinterleave */
+        sinc = s->channels;
+        sptr = samples + s->channel_map[ch];
+        for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
+            s->planar_samples[ch][i] = *sptr;
+            sptr += sinc;
+        }
+    }
+}
+
+
+/**
+ * Apply the MDCT to input samples to generate frequency coefficients.
+ * This applies the KBD window and normalizes the input to reduce precision
+ * loss due to fixed-point calculations.
+ */
+void AC3_NAME(apply_mdct)(AC3EncodeContext *s)
+{
+    int blk, ch;
+
+    for (ch = 0; ch < s->channels; ch++) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
+
+            s->apply_window(&s->dsp, s->windowed_samples, input_samples,
+                            s->mdct->window, AC3_WINDOW_SIZE);
+
+            if (s->fixed_point)
+                block->coeff_shift[ch+1] = s->normalize_samples(s);
+
+            s->mdct->fft.mdct_calcw(&s->mdct->fft, block->mdct_coef[ch+1],
+                                    s->windowed_samples);
+        }
+    }
+}
+
+
+/**
+ * Calculate a single coupling coordinate.
+ */
+static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
+{
+    float coord = 0.125;
+    if (energy_cpl > 0)
+        coord *= sqrtf(energy_ch / energy_cpl);
+    return coord;
+}
+
+
+/**
+ * Calculate coupling channel and coupling coordinates.
+ * TODO: Currently this is only used for the floating-point encoder. I was
+ *       able to make it work for the fixed-point encoder, but quality was
+ *       generally lower in most cases than not using coupling. If a more
+ *       adaptive coupling strategy were to be implemented it might be useful
+ *       at that time to use coupling for the fixed-point encoder as well.
+ */
+void AC3_NAME(apply_channel_coupling)(AC3EncodeContext *s)
+{
+#if CONFIG_AC3ENC_FLOAT
+    LOCAL_ALIGNED_16(float,   cpl_coords,       [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+    LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+    int blk, ch, bnd, i, j;
+    CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+    int num_cpl_coefs = s->num_cpl_subbands * 12;
+
+    memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
+    memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
+
+    /* calculate coupling channel from fbw channels */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
+        if (!block->cpl_in_use)
+            continue;
+        memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
+            if (!block->channel_in_cpl[ch])
+                continue;
+            for (i = 0; i < num_cpl_coefs; i++)
+                cpl_coef[i] += ch_coef[i];
+        }
+        /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
+                 will always be a multiple of 12, so we need to subtract 1 from
+                 the start and add 4 to the length when using optimized
+                 functions which require 16-byte alignment. */
+
+        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
+        s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
+
+        /* scale coupling coefficients from float to 24-bit fixed-point */
+        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
+                                   cpl_coef-1, num_cpl_coefs+4);
+    }
+
+    /* calculate energy in each band in coupling channel and each fbw channel */
+    /* TODO: possibly use SIMD to speed up energy calculation */
+    bnd = 0;
+    i = s->start_freq[CPL_CH];
+    while (i < s->cpl_end_freq) {
+        int band_size = s->cpl_band_sizes[bnd];
+        for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
+            for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+                AC3Block *block = &s->blocks[blk];
+                if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
+                    continue;
+                for (j = 0; j < band_size; j++) {
+                    CoefType v = block->mdct_coef[ch][i+j];
+                    MAC_COEF(energy[blk][ch][bnd], v, v);
+                }
+            }
+        }
+        i += band_size;
+        bnd++;
+    }
+
+    /* determine which blocks to send new coupling coordinates for */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block  = &s->blocks[blk];
+        AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
+        int new_coords = 0;
+        CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,};
+
+        if (block->cpl_in_use) {
+            /* calculate coupling coordinates for all blocks and calculate the
+               average difference between coordinates in successive blocks */
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (!block->channel_in_cpl[ch])
+                    continue;
+
+                for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                    cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd],
+                                                              energy[blk][CPL_CH][bnd]);
+                    if (blk > 0 && block0->cpl_in_use &&
+                        block0->channel_in_cpl[ch]) {
+                        coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] -
+                                               cpl_coords[blk  ][ch][bnd]);
+                    }
+                }
+                coord_diff[ch] /= s->num_cpl_bands;
+            }
+
+            /* send new coordinates if this is the first block, if previous
+             * block did not use coupling but this block does, the channels
+             * using coupling has changed from the previous block, or the
+             * coordinate difference from the last block for any channel is
+             * greater than a threshold value. */
+            if (blk == 0) {
+                new_coords = 1;
+            } else if (!block0->cpl_in_use) {
+                new_coords = 1;
+            } else {
+                for (ch = 1; ch <= s->fbw_channels; ch++) {
+                    if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) {
+                        new_coords = 1;
+                        break;
+                    }
+                }
+                if (!new_coords) {
+                    for (ch = 1; ch <= s->fbw_channels; ch++) {
+                        if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) {
+                            new_coords = 1;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        block->new_cpl_coords = new_coords;
+    }
+
+    /* calculate final coupling coordinates, taking into account reusing of
+       coordinates in successive blocks */
+    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+        blk = 0;
+        while (blk < AC3_MAX_BLOCKS) {
+            int blk1;
+            CoefSumType energy_cpl;
+            AC3Block *block  = &s->blocks[blk];
+
+            if (!block->cpl_in_use) {
+                blk++;
+                continue;
+            }
+
+            energy_cpl = energy[blk][CPL_CH][bnd];
+            blk1 = blk+1;
+            while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+                if (s->blocks[blk1].cpl_in_use)
+                    energy_cpl += energy[blk1][CPL_CH][bnd];
+                blk1++;
+            }
+
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                CoefType energy_ch;
+                if (!block->channel_in_cpl[ch])
+                    continue;
+                energy_ch = energy[blk][ch][bnd];
+                blk1 = blk+1;
+                while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+                    if (s->blocks[blk1].cpl_in_use)
+                        energy_ch += energy[blk1][ch][bnd];
+                    blk1++;
+                }
+                cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl);
+            }
+            blk = blk1;
+        }
+    }
+
+    /* calculate exponents/mantissas for coupling coordinates */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        if (!block->cpl_in_use || !block->new_cpl_coords)
+            continue;
+
+        s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
+                                   cpl_coords[blk][1],
+                                   s->fbw_channels * 16);
+        s->ac3dsp.extract_exponents(block->cpl_coord_exp[1],
+                                    fixed_cpl_coords[blk][1],
+                                    s->fbw_channels * 16);
+
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            int bnd, min_exp, max_exp, master_exp;
+
+            /* determine master exponent */
+            min_exp = max_exp = block->cpl_coord_exp[ch][0];
+            for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
+                int exp = block->cpl_coord_exp[ch][bnd];
+                min_exp = FFMIN(exp, min_exp);
+                max_exp = FFMAX(exp, max_exp);
+            }
+            master_exp = ((max_exp - 15) + 2) / 3;
+            master_exp = FFMAX(master_exp, 0);
+            while (min_exp < master_exp * 3)
+                master_exp--;
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] -
+                                                        master_exp * 3, 0, 15);
+            }
+            block->cpl_master_exp[ch] = master_exp;
+
+            /* quantize mantissas */
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                int cpl_exp  = block->cpl_coord_exp[ch][bnd];
+                int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24;
+                if (cpl_exp == 15)
+                    cpl_mant >>= 1;
+                else
+                    cpl_mant -= 16;
+
+                block->cpl_coord_mant[ch][bnd] = cpl_mant;
+            }
+        }
+    }
+
+    if (CONFIG_EAC3_ENCODER && s->eac3)
+        ff_eac3_set_cpl_states(s);
+#endif /* CONFIG_AC3ENC_FLOAT */
+}
+
+
+/**
+ * Determine rematrixing flags for each block and band.
+ */
+void AC3_NAME(compute_rematrixing_strategy)(AC3EncodeContext *s)
+{
+    int nb_coefs;
+    int blk, bnd, i;
+    AC3Block *block, *av_uninit(block0);
+
+    if (s->channel_mode != AC3_CHMODE_STEREO)
+        return;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        block = &s->blocks[blk];
+        block->new_rematrixing_strategy = !blk;
+
+        if (!s->rematrixing_enabled) {
+            block0 = block;
+            continue;
+        }
+
+        block->num_rematrixing_bands = 4;
+        if (block->cpl_in_use) {
+            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61);
+            block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37);
+            if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands)
+                block->new_rematrixing_strategy = 1;
+        }
+        nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
+
+        for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
+            /* calculate calculate sum of squared coeffs for one band in one block */
+            int start = ff_ac3_rematrix_band_tab[bnd];
+            int end   = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
+            CoefSumType sum[4] = {0,};
+            for (i = start; i < end; i++) {
+                CoefType lt = block->mdct_coef[1][i];
+                CoefType rt = block->mdct_coef[2][i];
+                CoefType md = lt + rt;
+                CoefType sd = lt - rt;
+                MAC_COEF(sum[0], lt, lt);
+                MAC_COEF(sum[1], rt, rt);
+                MAC_COEF(sum[2], md, md);
+                MAC_COEF(sum[3], sd, sd);
+            }
+
+            /* compare sums to determine if rematrixing will be used for this band */
+            if (FFMIN(sum[2], sum[3]) < FFMIN(sum[0], sum[1]))
+                block->rematrixing_flags[bnd] = 1;
+            else
+                block->rematrixing_flags[bnd] = 0;
+
+            /* determine if new rematrixing flags will be sent */
+            if (blk &&
+                block->rematrixing_flags[bnd] != block0->rematrixing_flags[bnd]) {
+                block->new_rematrixing_strategy = 1;
+            }
+        }
+        block0 = block;
+    }
+}
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
index 20f4b879c6..d37acaf20b 100644
--- a/libavcodec/eac3enc.c
+++ b/libavcodec/eac3enc.c
@@ -28,6 +28,13 @@
 #include "ac3enc.h"
 #include "eac3enc.h"
 
+
+#define AC3ENC_TYPE AC3ENC_TYPE_EAC3
+#include "ac3enc_opts_template.c"
+static AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name,
+                                 eac3_options, LIBAVUTIL_VERSION_INT };
+
+
 void ff_eac3_set_cpl_states(AC3EncodeContext *s)
 {
     int ch, blk;
@@ -129,3 +136,20 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
     /* block start info */
     put_bits(&s->pb, 1, 0);
 }
+
+
+#if CONFIG_EAC3_ENCODER
+AVCodec ff_eac3_encoder = {
+    .name            = "eac3",
+    .type            = AVMEDIA_TYPE_AUDIO,
+    .id              = CODEC_ID_EAC3,
+    .priv_data_size  = sizeof(AC3EncodeContext),
+    .init            = ff_ac3_encode_init,
+    .encode          = ff_ac3_encode_frame,
+    .close           = ff_ac3_encode_close,
+    .sample_fmts     = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
+    .long_name       = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"),
+    .priv_class      = &eac3enc_class,
+    .channel_layouts = ff_ac3_channel_layouts,
+};
+#endif

From 38c304addd978410956c8ff02ea83a6ffb9a606a Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 10 Jun 2011 15:17:55 -0400
Subject: [PATCH 794/830] ac3enc: remove empty ac3_float function that is never
 called

---
 libavcodec/ac3enc.c       |  1 -
 libavcodec/ac3enc.h       |  1 -
 libavcodec/ac3enc_float.c | 10 ----------
 3 files changed, 12 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 1147ed142e..78e81654e3 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2363,7 +2363,6 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         s->mdct_end                     = ff_ac3_float_mdct_end;
         s->mdct_init                    = ff_ac3_float_mdct_init;
         s->apply_window                 = ff_ac3_float_apply_window;
-        s->normalize_samples            = ff_ac3_float_normalize_samples;
         s->scale_coefficients           = ff_ac3_float_scale_coefficients;
         s->deinterleave_input_samples   = ff_ac3_float_deinterleave_input_samples;
         s->apply_mdct                   = ff_ac3_float_apply_mdct;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index e9d7e0a83a..1d17484321 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -269,7 +269,6 @@ void ff_ac3_float_apply_window(DSPContext *dsp, SampleType *output,
                                const SampleType *window, unsigned int len);
 
 int ff_ac3_fixed_normalize_samples(AC3EncodeContext *s);
-int ff_ac3_float_normalize_samples(AC3EncodeContext *s);
 
 void ff_ac3_fixed_scale_coefficients(AC3EncodeContext *s);
 void ff_ac3_float_scale_coefficients(AC3EncodeContext *s);
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 9c7e88ed1c..43fbb954d6 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -90,16 +90,6 @@ void ff_ac3_float_apply_window(DSPContext *dsp, float *output,
 }
 
 
-/**
- * Normalize the input samples to use the maximum available precision.
- */
-int ff_ac3_float_normalize_samples(AC3EncodeContext *s)
-{
-    /* Normalization is not needed for floating-point samples, so just return 0 */
-    return 0;
-}
-
-
 /**
  * Scale MDCT coefficients from float to 24-bit fixed-point.
  */

From 35bdaf3d427b6856df01d41ee826bd515440ec46 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Fri, 10 Jun 2011 20:27:50 +0200
Subject: [PATCH 795/830] utils: Drop pointless '#if 1' preprocessor directive.

---
 libavformat/utils.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index d0fd0d46ff..60f4d03e4b 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1577,14 +1577,12 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i
 
     pos = (flags & AVSEEK_FLAG_BACKWARD) ? pos_min : pos_max;
     ts  = (flags & AVSEEK_FLAG_BACKWARD) ?  ts_min :  ts_max;
-#if 1
     pos_min = pos;
     ts_min = read_timestamp(s, stream_index, &pos_min, INT64_MAX);
     pos_min++;
     ts_max = read_timestamp(s, stream_index, &pos_min, INT64_MAX);
     av_dlog(s, "pos=0x%"PRIx64" %"PRId64"<=%"PRId64"<=%"PRId64"\n",
             pos, ts_min, target_ts, ts_max);
-#endif
     *ts_ret= ts;
     return pos;
 }

From 99477adc31c0569b3cebe8004dd584aa4726a2d1 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Mon, 13 Jun 2011 17:44:50 -0400
Subject: [PATCH 796/830] ac3enc: fix allocation of floating point samples.

sizeof(SampleType) is different for fixed and float encoders.
---
 libavcodec/ac3enc.c          | 16 +++++++---------
 libavcodec/ac3enc.h          |  5 +++++
 libavcodec/ac3enc_template.c | 20 ++++++++++++++++++++
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 78e81654e3..3426bd252a 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2215,15 +2215,9 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
     int channels = s->channels + 1; /* includes coupling channel */
 
-    FF_ALLOC_OR_GOTO(avctx, s->windowed_samples, AC3_WINDOW_SIZE *
-                     sizeof(*s->windowed_samples), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples),
-                     alloc_fail);
-    for (ch = 0; ch < s->channels; ch++) {
-        FF_ALLOCZ_OR_GOTO(avctx, s->planar_samples[ch],
-                          (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples),
-                          alloc_fail);
-    }
+    if (s->allocate_sample_buffers(s))
+        goto alloc_fail;
+
     FF_ALLOC_OR_GOTO(avctx, s->bap_buffer,  AC3_MAX_BLOCKS * channels *
                      AC3_MAX_COEFS * sizeof(*s->bap_buffer),  alloc_fail);
     FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
@@ -2323,6 +2317,8 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
     int ret, frame_size_58;
 
+    s->avctx = avctx;
+
     s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
 
     avctx->frame_size = AC3_FRAME_SIZE;
@@ -2355,6 +2351,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         s->apply_window                 = ff_ac3_fixed_apply_window;
         s->normalize_samples            = ff_ac3_fixed_normalize_samples;
         s->scale_coefficients           = ff_ac3_fixed_scale_coefficients;
+        s->allocate_sample_buffers      = ff_ac3_fixed_allocate_sample_buffers;
         s->deinterleave_input_samples   = ff_ac3_fixed_deinterleave_input_samples;
         s->apply_mdct                   = ff_ac3_fixed_apply_mdct;
         s->apply_channel_coupling       = ff_ac3_fixed_apply_channel_coupling;
@@ -2364,6 +2361,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         s->mdct_init                    = ff_ac3_float_mdct_init;
         s->apply_window                 = ff_ac3_float_apply_window;
         s->scale_coefficients           = ff_ac3_float_scale_coefficients;
+        s->allocate_sample_buffers      = ff_ac3_float_allocate_sample_buffers;
         s->deinterleave_input_samples   = ff_ac3_float_deinterleave_input_samples;
         s->apply_mdct                   = ff_ac3_float_apply_mdct;
         s->apply_channel_coupling       = ff_ac3_float_apply_channel_coupling;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 1d17484321..bf25298940 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -135,6 +135,7 @@ typedef struct AC3Block {
 typedef struct AC3EncodeContext {
     AVClass *av_class;                      ///< AVClass used for AVOption
     AC3EncOptions options;                  ///< encoding options
+    AVCodecContext *avctx;                  ///< parent AVCodecContext
     PutBitContext pb;                       ///< bitstream writer context
     DSPContext dsp;
     AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
@@ -230,6 +231,7 @@ typedef struct AC3EncodeContext {
     void (*scale_coefficients)(struct AC3EncodeContext *s);
 
     /* fixed vs. float templated function pointers */
+    int  (*allocate_sample_buffers)(struct AC3EncodeContext *s);
     void (*deinterleave_input_samples)(struct AC3EncodeContext *s,
                                        const SampleType *samples);
     void (*apply_mdct)(struct AC3EncodeContext *s);
@@ -276,6 +278,9 @@ void ff_ac3_float_scale_coefficients(AC3EncodeContext *s);
 
 /* prototypes for functions in ac3enc_template.c */
 
+int ff_ac3_fixed_allocate_sample_buffers(AC3EncodeContext *s);
+int ff_ac3_float_allocate_sample_buffers(AC3EncodeContext *s);
+
 void ff_ac3_fixed_deinterleave_input_samples(AC3EncodeContext *s,
                                              const SampleType *samples);
 void ff_ac3_float_deinterleave_input_samples(AC3EncodeContext *s,
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index d88fa225a1..0547165aaf 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -31,6 +31,26 @@
 #include "ac3enc.h"
 
 
+int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
+{
+    int ch;
+
+    FF_ALLOC_OR_GOTO(s->avctx, s->windowed_samples, AC3_WINDOW_SIZE *
+                     sizeof(*s->windowed_samples), alloc_fail);
+    FF_ALLOC_OR_GOTO(s->avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples),
+                     alloc_fail);
+    for (ch = 0; ch < s->channels; ch++) {
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->planar_samples[ch],
+                          (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples),
+                          alloc_fail);
+    }
+
+    return 0;
+alloc_fail:
+    return AVERROR(ENOMEM);
+}
+
+
 /**
  * Deinterleave input samples.
  * Channels are reordered from Libav's default order to AC-3 order.

From f440f742052256a854ce2f81a52fa5cf305a895c Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 14 Jun 2011 01:53:40 +0200
Subject: [PATCH 797/830] matroskaenc: ensure the written colorspace don't
 depend on host endianness

---
 libavformat/matroskaenc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 50e931e5c2..9e42019415 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -615,8 +615,10 @@ static int mkv_write_tracks(AVFormatContext *s)
                     put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYUNIT, 3);
                 }
 
-                if (codec->codec_id == CODEC_ID_RAWVIDEO)
-                    put_ebml_binary(pb, MATROSKA_ID_VIDEOCOLORSPACE, &codec->codec_tag, 4);
+                if (codec->codec_id == CODEC_ID_RAWVIDEO) {
+                    uint32_t color_space = av_le2ne32(codec->codec_tag);
+                    put_ebml_binary(pb, MATROSKA_ID_VIDEOCOLORSPACE, &color_space, sizeof(color_space));
+                }
                 end_ebml_master(pb, subinfo);
                 break;
 

From 5fec3a2539b7c94a94fe891c3f7d92e6d5291682 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 14 Jun 2011 01:58:11 +0200
Subject: [PATCH 798/830] matroskadec: use a temporary fourcc variable

---
 libavformat/matroskadec.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index bd71befcf9..32b2c8efca 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1281,6 +1281,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
         uint8_t *extradata = NULL;
         int extradata_size = 0;
         int extradata_offset = 0;
+        uint32_t fourcc = 0;
         AVIOContext b;
 
         /* Apply some sanity checks. */
@@ -1302,6 +1303,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
                 track->video.display_width = track->video.pixel_width;
             if (!track->video.display_height)
                 track->video.display_height = track->video.pixel_height;
+            fourcc = track->video.fourcc;
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             if (!track->audio.out_samplerate)
                 track->audio.out_samplerate = track->audio.samplerate;
@@ -1361,8 +1363,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             && track->codec_priv.size >= 40
             && track->codec_priv.data != NULL) {
             track->ms_compat = 1;
-            track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
-            codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc);
+            fourcc = AV_RL32(track->codec_priv.data + 16);
+            codec_id = ff_codec_get_id(ff_codec_bmp_tags, fourcc);
             extradata_offset = 40;
         } else if (!strcmp(track->codec_id, "A_MS/ACM")
                    && track->codec_priv.size >= 14
@@ -1378,8 +1380,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
         } else if (!strcmp(track->codec_id, "V_QUICKTIME")
                    && (track->codec_priv.size >= 86)
                    && (track->codec_priv.data != NULL)) {
-            track->video.fourcc = AV_RL32(track->codec_priv.data);
-            codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc);
+            fourcc = AV_RL32(track->codec_priv.data);
+            codec_id = ff_codec_get_id(codec_movvideo_tags, fourcc);
         } else if (codec_id == CODEC_ID_PCM_S16BE) {
             switch (track->audio.bitdepth) {
             case  8:  codec_id = CODEC_ID_PCM_U8;     break;
@@ -1500,7 +1502,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
             MatroskaTrackPlane *planes = track->operation.combine_planes.elem;
 
             st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-            st->codec->codec_tag  = track->video.fourcc;
+            st->codec->codec_tag  = fourcc;
             st->codec->width  = track->video.pixel_width;
             st->codec->height = track->video.pixel_height;
             av_reduce(&st->sample_aspect_ratio.num,

From fdb5e02901111a6a53f8386d82afae0aa2d746a7 Mon Sep 17 00:00:00 2001
From: Aurelien Jacobs <aurel@gnuage.org>
Date: Tue, 14 Jun 2011 02:00:06 +0200
Subject: [PATCH 799/830] matroskadec: properly decode color space in an endian
 neutral way

---
 libavformat/matroskadec.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 32b2c8efca..6e951bac18 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -112,7 +112,7 @@ typedef struct {
     uint64_t display_height;
     uint64_t pixel_width;
     uint64_t pixel_height;
-    uint64_t fourcc;
+    EbmlBin color_space;
     uint64_t stereo_mode;
 } MatroskaTrackVideo;
 
@@ -302,7 +302,7 @@ static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
     { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
     { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
-    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
+    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_BIN,  0, offsetof(MatroskaTrackVideo,color_space) },
     { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,stereo_mode) },
     { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
     { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
@@ -1303,7 +1303,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
                 track->video.display_width = track->video.pixel_width;
             if (!track->video.display_height)
                 track->video.display_height = track->video.pixel_height;
-            fourcc = track->video.fourcc;
+            if (track->video.color_space.size == 4)
+                fourcc = AV_RL32(track->video.color_space.data);
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             if (!track->audio.out_samplerate)
                 track->audio.out_samplerate = track->audio.samplerate;

From c90b94424cd4953a095d6d6648ba8d499e306b35 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Fri, 3 Jun 2011 01:12:28 -0700
Subject: [PATCH 800/830] 4:4:4 H.264 decoding support

Note: this is 4:4:4 from the 2007 spec revision, not the previous (now deprecated) 4:4:4 mode in H.264.
---
 libavcodec/arm/h264dsp_init_arm.c  |   3 +-
 libavcodec/dsputil.h               |   2 +-
 libavcodec/dsputil_template.c      |   6 +-
 libavcodec/h264.c                  | 760 +++++++++++++++++---------
 libavcodec/h264.h                  | 181 ++++---
 libavcodec/h264_cabac.c            | 819 +++++++++++++++++++++++++----
 libavcodec/h264_cavlc.c            | 198 ++++---
 libavcodec/h264_loopfilter.c       |  70 ++-
 libavcodec/h264_ps.c               |  14 +-
 libavcodec/h264dsp.h               |   8 +-
 libavcodec/h264idct_template.c     |  44 +-
 libavcodec/mpegvideo.c             |  25 +-
 libavcodec/ppc/h264_altivec.c      |  22 +-
 libavcodec/snow.c                  |   6 +-
 libavcodec/x86/dsputil_mmx.c       |   4 +-
 libavcodec/x86/h264_i386.h         |  15 +-
 libavcodec/x86/h264_idct.asm       |  44 +-
 libavcodec/x86/h264_idct_10bit.asm |  35 +-
 18 files changed, 1609 insertions(+), 647 deletions(-)

diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index c2399e50ff..483b26ab02 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -122,7 +122,8 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
     c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
     c->h264_idct_add16      = ff_h264_idct_add16_neon;
     c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
-    c->h264_idct_add8       = ff_h264_idct_add8_neon;
+    //FIXME: reenable when asm is updated.
+    //c->h264_idct_add8       = ff_h264_idct_add8_neon;
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index cfc574aebb..7a28b06fd5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -507,7 +507,7 @@ typedef struct DSPContext {
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
-    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int sides);
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides);
 #define EDGE_WIDTH 16
 #define EDGE_TOP    1
 #define EDGE_BOTTOM 2
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 8ca6d3e414..b85931856a 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -79,7 +79,7 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS
 
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
-static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
 {
     pixel *buf = (pixel*)_buf;
     int wrap = _wrap / sizeof(pixel);
@@ -106,10 +106,10 @@ static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, i
     buf -= w;
     last_line = buf + (height - 1) * wrap;
     if (sides & EDGE_TOP)
-        for(i = 0; i < w; i++)
+        for(i = 0; i < h; i++)
             memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
     if (sides & EDGE_BOTTOM)
-        for (i = 0; i < w; i++)
+        for (i = 0; i < h; i++)
             memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
 }
 
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 276d6e6d6c..86ea218807 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -451,12 +451,13 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
-                           int pixel_shift){
+                           int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
+    int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
+    uint8_t * src_y = pic->data[0] + offset;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -483,6 +484,31 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
 
+    if(chroma444){
+        src_cb = pic->data[1] + offset;
+        if(emu){
+            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
+                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
+        }
+        qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
+        if(!square){
+            qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
+        }
+
+        src_cr = pic->data[2] + offset;
+        if(emu){
+            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
+                                    16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
+        }
+        qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
+        if(!square){
+            qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
+        }
+        return;
+    }
+
     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
 
     if(MB_FIELD){
@@ -511,14 +537,19 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    if(chroma444){
+        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    }else{
+        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    }
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -526,7 +557,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift);
+                           qpix_op, chroma_op, pixel_shift, chroma444);
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
@@ -536,7 +567,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op, pixel_shift);
+                           qpix_op, chroma_op, pixel_shift, chroma444);
     }
 }
 
@@ -546,12 +577,19 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
 
-    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
-    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
-    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    if(chroma444){
+        chroma_weight_avg = luma_weight_avg;
+        chroma_weight_op = luma_weight_op;
+        dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+        dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
+    }else{
+        dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+        dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    }
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -559,17 +597,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
-        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
+        uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
@@ -594,7 +632,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put, pixel_shift);
+                    qpix_put, chroma_put, pixel_shift, chroma444);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
@@ -613,21 +651,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1, int pixel_shift){
+                           int list0, int list1, int pixel_shift, int chroma444){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
                          weight_op[0], weight_op[3], weight_avg[0],
-                         weight_avg[3], list0, list1, pixel_shift);
+                         weight_avg[3], list0, list1, pixel_shift, chroma444);
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
-                    chroma_avg, list0, list1, pixel_shift);
+                    chroma_avg, list0, list1, pixel_shift, chroma444);
 }
 
-static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
+static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
@@ -638,8 +676,13 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
         uint8_t **src= h->ref_list[list][refn].data;
         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
-        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+        if(chroma444){
+            s->dsp.prefetch(src[1]+off, s->linesize, 4);
+            s->dsp.prefetch(src[2]+off, s->linesize, 4);
+        }else{
+            off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
+            s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+        }
     }
 }
 
@@ -647,7 +690,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                      int pixel_shift){
+                      int pixel_shift, int chroma444){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
@@ -656,36 +699,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
 
     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h);
-    prefetch_motion(h, 0, pixel_shift);
+    prefetch_motion(h, 0, pixel_shift, chroma444);
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else if(IS_16X8(mb_type)){
         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
-                pixel_shift);
+                pixel_shift, chroma444);
     }else{
         int i;
 
@@ -702,29 +745,29 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else if(IS_SUB_8X4(sub_mb_type)){
                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                    pixel_shift);
+                    pixel_shift, chroma444);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
@@ -735,13 +778,13 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
-                        pixel_shift);
+                        pixel_shift, chroma444);
                 }
             }
         }
     }
 
-    prefetch_motion(h, 1, pixel_shift);
+    prefetch_motion(h, 1, pixel_shift, chroma444);
 }
 
 #define hl_motion_fn(sh, bits) \
@@ -753,10 +796,11 @@ static av_always_inline void hl_motion_ ## bits(H264Context *h, \
                                        qpel_mc_func (*qpix_avg)[16], \
                                        h264_chroma_mc_func (*chroma_avg), \
                                        h264_weight_func *weight_op, \
-                                       h264_biweight_func *weight_avg) \
+                                       h264_biweight_func *weight_avg, \
+                                       int chroma444) \
 { \
     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
-              qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
+              qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
 }
 hl_motion_fn(0, 8);
 hl_motion_fn(1, 16);
@@ -796,16 +840,19 @@ static void free_tables(H264Context *h, int free_rbsp){
 }
 
 static void init_dequant8_coeff_table(H264Context *h){
-    int i,q,x;
+    int i,j,q,x;
     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
-    h->dequant8_coeff[0] = h->dequant8_buffer[0];
-    h->dequant8_coeff[1] = h->dequant8_buffer[1];
 
-    for(i=0; i<2; i++ ){
-        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
-            h->dequant8_coeff[1] = h->dequant8_buffer[0];
-            break;
+    for(i=0; i<6; i++ ){
+        h->dequant8_coeff[i] = h->dequant8_buffer[i];
+        for(j=0; j<i; j++){
+            if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
+                h->dequant8_coeff[i] = h->dequant8_buffer[j];
+                break;
+            }
         }
+        if(j<i)
+            continue;
 
         for(q=0; q<max_qp+1; q++){
             int shift = div6[q];
@@ -853,7 +900,7 @@ static void init_dequant_tables(H264Context *h){
             for(x=0; x<16; x++)
                 h->dequant4_coeff[i][0][x] = 1<<6;
         if(h->pps.transform_8x8_mode)
-            for(i=0; i<2; i++)
+            for(i=0; i<6; i++)
                 for(x=0; x<64; x++)
                     h->dequant8_coeff[i][0][x] = 1<<6;
     }
@@ -868,7 +915,7 @@ int ff_h264_alloc_tables(H264Context *h){
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
 
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 32 * sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
 
@@ -930,8 +977,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
  * Allocate buffers which are not shared amongst multiple threads.
  */
 static int context_init(H264Context *h){
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
 
     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
@@ -1130,9 +1177,10 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
 
         // frame_start may not be called for the next thread (if it's decoding a bottom field)
         // so this has to be allocated here
-        h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+        h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
 
         s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
     }
 
     //extradata/NAL handling
@@ -1151,7 +1199,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
     for(i=0; i<6; i++)
         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
 
-    for(i=0; i<2; i++)
+    for(i=0; i<6; i++)
         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
 
     h->dequant_coeff_pps = h1->dequant_coeff_pps;
@@ -1206,20 +1254,20 @@ int ff_h264_frame_start(H264Context *h){
 
     for(i=0; i<16; i++){
         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
-    for(i=0; i<4; i++){
+    for(i=0; i<16; i++){
         h->block_offset[16+i]=
-        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+16+i]=
-        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[48+16+i]=
+        h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
     for(i = 0; i < thread_count; i++)
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
-            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
 
     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
@@ -1404,7 +1452,7 @@ static void decode_postinit(H264Context *h, int setup_finished){
         ff_thread_finish_setup(s->avctx);
 }
 
-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
     int top_idx = 1;
@@ -1422,12 +1470,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
                 if (pixel_shift)
                     AV_COPY128(top_border+16, src_y+15*linesize+16);
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                    if (pixel_shift) {
-                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
-                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
+                    if(chroma444){
+                        if (pixel_shift){
+                            AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
+                            AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
+                            AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
+                            AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
+                        } else {
+                            AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
+                            AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
+                        }
                     } else {
-                    AV_COPY64(top_border+16, src_cb+7*uvlinesize);
-                    AV_COPY64(top_border+24, src_cr+7*uvlinesize);
+                        if (pixel_shift) {
+                            AV_COPY128(top_border+32, src_cb+7*uvlinesize);
+                            AV_COPY128(top_border+48, src_cr+7*uvlinesize);
+                        } else {
+                            AV_COPY64(top_border+16, src_cb+7*uvlinesize);
+                            AV_COPY64(top_border+24, src_cr+7*uvlinesize);
+                        }
                     }
                 }
             }
@@ -1445,12 +1505,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
         AV_COPY128(top_border+16, src_y+16*linesize+16);
 
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if (pixel_shift) {
-            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
-            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
+        if(chroma444){
+            if (pixel_shift){
+                AV_COPY128(top_border+32, src_cb + 16*linesize);
+                AV_COPY128(top_border+48, src_cb + 16*linesize+16);
+                AV_COPY128(top_border+64, src_cr + 16*linesize);
+                AV_COPY128(top_border+80, src_cr + 16*linesize+16);
+            } else {
+                AV_COPY128(top_border+16, src_cb + 16*linesize);
+                AV_COPY128(top_border+32, src_cr + 16*linesize);
+            }
         } else {
-        AV_COPY64(top_border+16, src_cb+8*uvlinesize);
-        AV_COPY64(top_border+24, src_cr+8*uvlinesize);
+            if (pixel_shift) {
+                AV_COPY128(top_border+32, src_cb+8*uvlinesize);
+                AV_COPY128(top_border+48, src_cr+8*uvlinesize);
+            } else {
+                AV_COPY64(top_border+16, src_cb+8*uvlinesize);
+                AV_COPY64(top_border+24, src_cr+8*uvlinesize);
+            }
         }
     }
 }
@@ -1458,7 +1530,8 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   uint8_t *src_cb, uint8_t *src_cr,
                                   int linesize, int uvlinesize,
-                                  int xchg, int simple, int pixel_shift){
+                                  int xchg, int chroma444,
+                                  int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     int deblock_topleft;
     int deblock_top;
@@ -1513,13 +1586,28 @@ else      AV_COPY64(b,a);
         }
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if(deblock_top){
+        if(chroma444){
             if(deblock_topleft){
-                XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
-                XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+            }
+            XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
+            XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
+            XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
+            XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
+            if(s->mb_x+1 < s->mb_width){
+                XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
+                XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
+            }
+        } else {
+            if(deblock_top){
+                if(deblock_topleft){
+                    XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                    XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
+                }
+                XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
+                XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
             }
-            XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
-            XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
         }
     }
 }
@@ -1538,6 +1626,159 @@ static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int in
         AV_WN16A(mb + index, value);
 }
 
+static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
+                                                       int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
+{
+    MpegEncContext * const s = &h->s;
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    int i;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    block_offset += 16*p;
+    if(IS_INTRA4x4(mb_type)){
+        if(simple || !s->encoding){
+            if(IS_8x8DCT(mb_type)){
+                if(transform_bypass){
+                    idct_dc_add =
+                    idct_add    = s->dsp.add_pixels8;
+                }else{
+                    idct_dc_add = h->h264dsp.h264_idct8_dc_add;
+                    idct_add    = h->h264dsp.h264_idct8_add;
+                }
+                for(i=0; i<16; i+=4){
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                        h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                    }else{
+                        const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
+                        h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+                                                    (h->topright_samples_available<<i)&0x4000, linesize);
+                        if(nnz){
+                            if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
+                                idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                            else
+                                idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }
+            }else{
+                if(transform_bypass){
+                    idct_dc_add =
+                    idct_add    = s->dsp.add_pixels4;
+                }else{
+                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
+                    idct_add    = h->h264dsp.h264_idct_add;
+                }
+                for(i=0; i<16; i++){
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+
+                    if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+                        h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                    }else{
+                        uint8_t *topright;
+                        int nnz, tr;
+                        uint64_t tr_high;
+                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                            assert(mb_y || linesize <= block_offset[i]);
+                            if(!topright_avail){
+                                if (pixel_shift) {
+                                    tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
+                                    topright= (uint8_t*) &tr_high;
+                                } else {
+                                    tr= ptr[3 - linesize]*0x01010101;
+                                    topright= (uint8_t*) &tr;
+                                }
+                            }else
+                                topright= ptr + (4 << pixel_shift) - linesize;
+                        }else
+                            topright= NULL;
+
+                        h->hpc.pred4x4[ dir ](ptr, topright, linesize);
+                        nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
+                        if(nnz){
+                            if(is_h264){
+                                if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
+                                    idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                                else
+                                    idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
+                            }else
+                                ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
+                        }
+                    }
+                }
+            }
+        }
+    }else{
+        h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+        if(is_h264){
+            if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
+                if(!transform_bypass)
+                    h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
+                else{
+                    static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
+                                                            8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
+                    for(i = 0; i < 16; i++)
+                        dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
+                }
+            }
+        }else
+            ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
+    }
+}
+
+static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
+                                                    int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
+{
+    MpegEncContext * const s = &h->s;
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    int i;
+    block_offset += 16*p;
+    if(!IS_INTRA4x4(mb_type)){
+        if(is_h264){
+            if(IS_INTRA16x16(mb_type)){
+                if(transform_bypass){
+                    if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
+                        h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
+                    }else{
+                        for(i=0; i<16; i++){
+                            if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }else{
+                    h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                }
+            }else if(h->cbp&15){
+                if(transform_bypass){
+                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+                    idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+                    for(i=0; i<16; i+=di){
+                        if(h->non_zero_count_cache[ scan8[i+p*16] ]){
+                            idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
+                        }
+                    }
+                }else{
+                    if(IS_8x8DCT(mb_type)){
+                        h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                    }else{
+                        h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
+                    }
+                }
+            }
+        }else{
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + block_offset[i];
+                    ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                }
+            }
+        }
+    }
+}
+
 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
@@ -1546,13 +1787,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize /*dct_offset*/;
-    int i;
+    int i, j;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
     /* is_h264 should always be true if SVQ3 is disabled. */
     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
@@ -1566,7 +1806,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     if (!simple && MB_FIELD) {
         linesize   = h->mb_linesize   = s->linesize * 2;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
-        block_offset = &h->block_offset[24];
+        block_offset = &h->block_offset[48];
         if(mb_y&1){ //FIXME move out of this function?
             dest_y -= s->linesize*15;
             dest_cb-= s->uvlinesize*7;
@@ -1629,194 +1869,67 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
     } else {
         if(IS_INTRA(mb_type)){
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
 
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
             }
 
-            if(IS_INTRA4x4(mb_type)){
-                if(simple || !s->encoding){
-                    if(IS_8x8DCT(mb_type)){
-                        if(transform_bypass){
-                            idct_dc_add =
-                            idct_add    = s->dsp.add_pixels8;
-                        }else{
-                            idct_dc_add = h->h264dsp.h264_idct8_dc_add;
-                            idct_add    = h->h264dsp.h264_idct8_add;
-                        }
-                        for(i=0; i<16; i+=4){
-                            uint8_t * const ptr= dest_y + block_offset[i];
-                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
-                            }else{
-                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
-                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
-                                                            (h->topright_samples_available<<i)&0x4000, linesize);
-                                if(nnz){
-                                    if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
-                                        idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                    else
-                                        idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                }
-                            }
-                        }
-                    }else{
-                        if(transform_bypass){
-                            idct_dc_add =
-                            idct_add    = s->dsp.add_pixels4;
-                        }else{
-                            idct_dc_add = h->h264dsp.h264_idct_dc_add;
-                            idct_add    = h->h264dsp.h264_idct_add;
-                        }
-                        for(i=0; i<16; i++){
-                            uint8_t * const ptr= dest_y + block_offset[i];
-                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+            hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
 
-                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
-                            }else{
-                                uint8_t *topright;
-                                int nnz, tr;
-                                uint64_t tr_high;
-                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
-                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
-                                    assert(mb_y || linesize <= block_offset[i]);
-                                    if(!topright_avail){
-                                        if (pixel_shift) {
-                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
-                                            topright= (uint8_t*) &tr_high;
-                                        } else {
-                                        tr= ptr[3 - linesize]*0x01010101;
-                                        topright= (uint8_t*) &tr;
-                                        }
-                                    }else
-                                        topright= ptr + (4 << pixel_shift) - linesize;
-                                }else
-                                    topright= NULL;
-
-                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
-                                nnz = h->non_zero_count_cache[ scan8[i] ];
-                                if(nnz){
-                                    if(is_h264){
-                                        if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
-                                            idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                        else
-                                            idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
-                                    }else
-                                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
-                                }
-                            }
-                        }
-                    }
-                }
-            }else{
-                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-                if(is_h264){
-                    if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
-                        if(!transform_bypass)
-                            h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
-                        else{
-                            static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
-                                                                    8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
-                            for(i = 0; i < 16; i++)
-                                dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
-                        }
-                    }
-                }else
-                    ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
-            }
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
         }else if(is_h264){
             if (pixel_shift) {
                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab);
+                             h->h264dsp.biweight_h264_pixels_tab, 0);
             } else
                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                             h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab);
+                            h->h264dsp.biweight_h264_pixels_tab, 0);
         }
 
-
-        if(!IS_INTRA4x4(mb_type)){
-            if(is_h264){
-                if(IS_INTRA16x16(mb_type)){
-                    if(transform_bypass){
-                        if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
-                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
-                        }else{
-                            for(i=0; i<16; i++){
-                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
-                            }
-                        }
-                    }else{
-                         h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                    }
-                }else if(h->cbp&15){
-                    if(transform_bypass){
-                        const int di = IS_8x8DCT(mb_type) ? 4 : 1;
-                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
-                        for(i=0; i<16; i+=di){
-                            if(h->non_zero_count_cache[ scan8[i] ]){
-                                idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
-                            }
-                        }
-                    }else{
-                        if(IS_8x8DCT(mb_type)){
-                            h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                        }else{
-                            h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
-                        }
-                    }
-                }
-            }else{
-                for(i=0; i<16; i++){
-                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                        uint8_t * const ptr= dest_y + block_offset[i];
-                        ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
-                    }
-                }
-            }
-        }
+        hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
 
         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
                 }else{
                     idct_add = s->dsp.add_pixels4;
-                    for(i=16; i<16+8; i++){
-                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
-                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
+                    for(j=1; j<3; j++){
+                        for(i=j*16; i<j*16+4; i++){
+                            if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
+                        }
                     }
                 }
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
                 }else{
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
-                    for(i=16; i<16+8; i++){
-                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                            uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
-                            ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                    for(j=1; j<3; j++){
+                        for(i=j*16; i<j*16+4; i++){
+                            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                                uint8_t * const ptr= dest[j-1] + block_offset[i];
+                                ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
+                            }
                         }
                     }
                 }
@@ -1824,7 +1937,112 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
         }
     }
     if(h->cbp || IS_INTRA(mb_type))
+    {
         s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
+    }
+}
+
+static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
+    MpegEncContext * const s = &h->s;
+    const int mb_x= s->mb_x;
+    const int mb_y= s->mb_y;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    uint8_t  *dest[3];
+    int linesize;
+    int i, j, p;
+    int *block_offset = &h->block_offset[0];
+    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
+
+    for (p = 0; p < 3; p++)
+    {
+        dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
+        s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
+    }
+
+    h->list_counts[mb_xy]= h->list_count;
+
+    if (!simple && MB_FIELD) {
+        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
+        block_offset = &h->block_offset[48];
+        if(mb_y&1) //FIXME move out of this function?
+            for (p = 0; p < 3; p++)
+                dest[p] -= s->linesize*15;
+        if(FRAME_MBAFF) {
+            int list;
+            for(list=0; list<h->list_count; list++){
+                if(!USES_LIST(mb_type, list))
+                    continue;
+                if(IS_16X16(mb_type)){
+                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
+                }else{
+                    for(i=0; i<16; i+=4){
+                        int ref = h->ref_cache[list][scan8[i]];
+                        if(ref >= 0)
+                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
+                    }
+                }
+            }
+        }
+    } else {
+        linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
+    }
+
+    if (!simple && IS_INTRA_PCM(mb_type)) {
+        if (pixel_shift) {
+            const int bit_depth = h->sps.bit_depth_luma;
+            GetBitContext gb;
+            init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
+
+            for (p = 0; p < 3; p++) {
+                for (i = 0; i < 16; i++) {
+                    uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
+                    for (j = 0; j < 16; j++)
+                        tmp[j] = get_bits(&gb, bit_depth);
+                }
+            }
+        } else {
+            for (p = 0; p < 3; p++) {
+                for (i = 0; i < 16; i++) {
+                    memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
+                }
+            }
+        }
+    } else {
+        if(IS_INTRA(mb_type)){
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
+
+            for (p = 0; p < 3; p++)
+                hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
+
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
+        }else{
+            if (pixel_shift) {
+                hl_motion_16(h, dest[0], dest[1], dest[2],
+                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                             h->h264dsp.weight_h264_pixels_tab,
+                             h->h264dsp.biweight_h264_pixels_tab, 1);
+            } else
+                hl_motion_8(h, dest[0], dest[1], dest[2],
+                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                            h->h264dsp.weight_h264_pixels_tab,
+                            h->h264dsp.biweight_h264_pixels_tab, 1);
+        }
+
+        for (p = 0; p < 3; p++)
+            hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
+    }
+    if(h->cbp || IS_INTRA(mb_type))
+    {
+        s->dsp.clear_blocks(h->mb);
+        s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
+    }
 }
 
 /**
@@ -1844,13 +2062,26 @@ static void av_noinline hl_decode_mb_complex(H264Context *h){
     hl_decode_mb_internal(h, 0, h->pixel_shift);
 }
 
+static void av_noinline hl_decode_mb_444_complex(H264Context *h){
+    hl_decode_mb_444_internal(h, 0, h->pixel_shift);
+}
+
+static void av_noinline hl_decode_mb_444_simple(H264Context *h){
+    hl_decode_mb_444_internal(h, 1, 0);
+}
+
 void ff_h264_hl_decode_mb(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
 
-    if (is_complex) {
+    if (CHROMA444) {
+        if(is_complex || h->pixel_shift)
+            hl_decode_mb_444_complex(h);
+        else
+            hl_decode_mb_444_simple(h);
+    } else if (is_complex) {
         hl_decode_mb_complex(h);
     } else if (h->pixel_shift) {
         hl_decode_mb_simple_16(h);
@@ -1866,7 +2097,7 @@ static int pred_weight_table(H264Context *h){
     h->use_weight= 0;
     h->use_weight_chroma= 0;
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
-    if(CHROMA)
+    if(h->sps.chroma_format_idc)
         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
     luma_def = 1<<h->luma_log2_weight_denom;
     chroma_def = 1<<h->chroma_log2_weight_denom;
@@ -1891,7 +2122,7 @@ static int pred_weight_table(H264Context *h){
                 h->luma_weight[i][list][1]= 0;
             }
 
-            if(CHROMA){
+            if(h->sps.chroma_format_idc){
                 chroma_weight_flag= get_bits1(&s->gb);
                 if(chroma_weight_flag){
                     int j;
@@ -2321,11 +2552,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     h->b_stride=  s->mb_width*4;
 
-    s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
+    s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
     if(h->sps.frame_mbs_only_flag)
-        s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
+        s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
     else
-        s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
+        s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
 
     if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height
@@ -2370,18 +2601,22 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
         switch (h->sps.bit_depth_luma) {
             case 9 :
-                s->avctx->pix_fmt = PIX_FMT_YUV420P9;
+                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
                 break;
             case 10 :
-                s->avctx->pix_fmt = PIX_FMT_YUV420P10;
+                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
                 break;
             default:
-        s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
-                                                 s->avctx->codec->pix_fmts ?
-                                                 s->avctx->codec->pix_fmts :
-                                                 s->avctx->color_range == AVCOL_RANGE_JPEG ?
-                                                 hwaccel_pixfmt_list_h264_jpeg_420 :
-                                                 ff_hwaccel_pixfmt_list_420);
+                if (CHROMA444){
+                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
+                }else{
+                    s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
+                                                             s->avctx->codec->pix_fmts ?
+                                                             s->avctx->codec->pix_fmts :
+                                                             s->avctx->color_range == AVCOL_RANGE_JPEG ?
+                                                             hwaccel_pixfmt_list_h264_jpeg_420 :
+                                                             ff_hwaccel_pixfmt_list_420);
+                }
         }
 
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
@@ -2873,11 +3108,10 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     if(IS_INTRA(mb_type))
         return 0;
 
-    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
-    AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
-    AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
+    AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
 
     h->cbp= h->cbp_table[mb_xy];
 
@@ -2929,45 +3163,45 @@ static int fill_filter_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
     }
 
     if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
+        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
+        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
+        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
+        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
     }
 
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
     if(!CABAC && h->pps.transform_8x8_mode){
         if(IS_8x8DCT(top_type)){
             h->non_zero_count_cache[4+8*0]=
-            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
+            h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
             h->non_zero_count_cache[6+8*0]=
-            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
+            h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
         }
         if(IS_8x8DCT(left_type[0])){
             h->non_zero_count_cache[3+8*1]=
-            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
         }
         if(IS_8x8DCT(left_type[1])){
             h->non_zero_count_cache[3+8*3]=
-            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
+            h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
         }
 
         if(IS_8x8DCT(mb_type)){
             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
 
             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
 
             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
 
             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
         }
     }
 
@@ -3041,8 +3275,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
-                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
-                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
+                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
                     //FIXME simplify above
 
                 if (MB_FIELD) {
@@ -3057,7 +3291,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
                     linesize   = h->mb_linesize   = s->linesize;
                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
                 }
-                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
+                backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
                 if(fill_filter_caches(h, mb_type))
                     continue;
                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 8c4f1ab21a..3abf895010 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -39,9 +39,6 @@
 #define interlaced_dct interlaced_dct_is_a_bad_name
 #define mb_intra mb_intra_is_not_initialized_see_mb_type
 
-#define LUMA_DC_BLOCK_INDEX   24
-#define CHROMA_DC_BLOCK_INDEX 25
-
 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
 #define COEFF_TOKEN_VLC_BITS           8
 #define TOTAL_ZEROS_VLC_BITS           9
@@ -60,8 +57,6 @@
  * of progressive decoding by about 2%. */
 #define ALLOW_INTERLACE
 
-#define ALLOW_NOCHROMA
-
 #define FMO 0
 
 /**
@@ -85,16 +80,12 @@
 #endif
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
 
-#ifdef ALLOW_NOCHROMA
-#define CHROMA h->sps.chroma_format_idc
-#else
-#define CHROMA 1
-#endif
-
 #ifndef CABAC
 #define CABAC h->pps.cabac
 #endif
 
+#define CHROMA444 (h->sps.chroma_format_idc == 3)
+
 #define EXTENDED_SAR          255
 
 #define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
@@ -198,7 +189,7 @@ typedef struct SPS{
     int num_reorder_frames;
     int scaling_matrix_present;
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[2][64];
+    uint8_t scaling_matrix8[6][64];
     int nal_hrd_parameters_present_flag;
     int vcl_hrd_parameters_present_flag;
     int pic_struct_present_flag;
@@ -233,7 +224,7 @@ typedef struct PPS{
     int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;     ///< transform_8x8_mode_flag
     uint8_t scaling_matrix4[6][16];
-    uint8_t scaling_matrix8[2][64];
+    uint8_t scaling_matrix8[6][64];
     uint8_t chroma_qp_table[2][64];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
 }PPS;
@@ -298,21 +289,15 @@ typedef struct H264Context{
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
-    uint8_t (*top_borders[2])[(16+2*8)*2];
+    uint8_t (*top_borders[2])[(16*3)*2];
 
     /**
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
+    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
 
-    /*
-    .UU.YYYY
-    .UU.YYYY
-    .vv.YYYY
-    .VV.YYYY
-    */
-    uint8_t (*non_zero_count)[32];
+    uint8_t (*non_zero_count)[48];
 
     /**
      * Motion vector cache.
@@ -336,7 +321,7 @@ typedef struct H264Context{
      * block_offset[ 0..23] for frame macroblocks
      * block_offset[24..47] for field macroblocks
      */
-    int block_offset[2*(16+8)];
+    int block_offset[2*(16*3)];
 
     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
     uint32_t *mb2br_xy;
@@ -356,9 +341,9 @@ typedef struct H264Context{
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
     uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
-    uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64];
+    uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
     uint32_t (*dequant4_coeff[6])[16];
-    uint32_t (*dequant8_coeff[2])[64];
+    uint32_t (*dequant8_coeff[6])[64];
 
     int slice_num;
     uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
@@ -408,15 +393,15 @@ typedef struct H264Context{
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
     DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
      * Cabac
      */
     CABACContext cabac;
-    uint8_t      cabac_state[460];
+    uint8_t      cabac_state[1024];
 
     /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
     uint16_t     *cbp_table;
@@ -721,27 +706,43 @@ o-o o-o
 */
 
 /* Scan8 organization:
- *   0 1 2 3 4 5 6 7
- * 0   u u y y y y y
- * 1 u U U y Y Y Y Y
- * 2 u U U y Y Y Y Y
- * 3   v v y Y Y Y Y
- * 4 v V V y Y Y Y Y
- * 5 v V V   DYDUDV
+ *    0 1 2 3 4 5 6 7
+ * 0  DY    y y y y y
+ * 1        y Y Y Y Y
+ * 2        y Y Y Y Y
+ * 3        y Y Y Y Y
+ * 4        y Y Y Y Y
+ * 5  DU    u u u u u
+ * 6        u U U U U
+ * 7        u U U U U
+ * 8        u U U U U
+ * 9        u U U U U
+ * 10 DV    v v v v v
+ * 11       v V V V V
+ * 12       v V V V V
+ * 13       v V V V V
+ * 14       v V V V V
  * DY/DU/DV are for luma/chroma DC.
  */
 
+#define LUMA_DC_BLOCK_INDEX   48
+#define CHROMA_DC_BLOCK_INDEX 49
+
 //This table must be here because scan8[constant] must be known at compiletime
-static const uint8_t scan8[16 + 2*4 + 3]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
- 4+5*8, 5+5*8, 6+5*8
+static const uint8_t scan8[16*3 + 3]={
+ 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+ 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+ 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+ 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+ 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+ 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+ 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+ 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+ 4+11*8, 5+11*8, 4+12*8, 5+12*8,
+ 6+11*8, 7+11*8, 6+12*8, 7+12*8,
+ 4+13*8, 5+13*8, 4+14*8, 5+14*8,
+ 6+13*8, 7+13*8, 6+14*8, 7+14*8,
+ 0+ 0*8, 0+ 5*8, 0+10*8
 };
 
 static av_always_inline uint32_t pack16to32(int a, int b){
@@ -773,11 +774,11 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
-    static const uint8_t left_block_options[4][16]={
-        {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
-        {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
-        {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
-        {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
+    static const uint8_t left_block_options[4][32]={
+        {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
+        {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
+        {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4},
+        {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}
     };
 
     h->topleft_partition= -1;
@@ -947,32 +948,41 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
-            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
-            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
-
-            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
-            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
-    }else {
-            h->non_zero_count_cache[1+8*0]=
-            h->non_zero_count_cache[2+8*0]=
-
-            h->non_zero_count_cache[1+8*3]=
-            h->non_zero_count_cache[2+8*3]=
-            AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
+        AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
+        if(CHROMA444){
+            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
+            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
+        }else{
+            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
+            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
+        }
+    }else{
+        uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+        AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
+        AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
+        AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
     }
 
     for (i=0; i<2; i++) {
         if(left_type[i]){
-            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
-            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
-                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
-                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+            h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
+            h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
+            if(CHROMA444){
+                h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
+                h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
+                h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
+                h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
+            }else{
+                h->non_zero_count_cache[3+8* 6 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
+                h->non_zero_count_cache[3+8*11 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+            }
         }else{
-                h->non_zero_count_cache[3+8*1 + 2*8*i]=
-                h->non_zero_count_cache[3+8*2 + 2*8*i]=
-                h->non_zero_count_cache[0+8*1 +   8*i]=
-                h->non_zero_count_cache[0+8*4 +   8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+            h->non_zero_count_cache[3+8* 1 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 2 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 6 + 2*8*i]=
+            h->non_zero_count_cache[3+8* 7 + 2*8*i]=
+            h->non_zero_count_cache[3+8*11 + 2*8*i]=
+            h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
 
@@ -981,15 +991,15 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         if(top_type) {
             h->top_cbp = h->cbp_table[top_xy];
         } else {
-            h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
+            h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
         // left_cbp
         if (left_type[0]) {
-            h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
+            h->left_cbp =   (h->cbp_table[left_xy[0]] & 0x7F0)
                         |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
                         | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
         } else {
-            h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
+            h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
     }
     }
@@ -1190,11 +1200,21 @@ static inline int pred_intra_mode(H264Context *h, int n){
 static inline void write_back_non_zero_count(H264Context *h){
     const int mb_xy= h->mb_xy;
 
-    AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
-    AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
-    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
-    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
-    AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
+    AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
+    AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
+    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
+    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
+    AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
+    AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
+
+    if(CHROMA444){
+        AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
+        AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
+        AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
+        AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
+    }
 }
 
 static inline void write_back_motion(H264Context *h, int mb_type){
@@ -1267,8 +1287,7 @@ static void av_unused decode_mb_skip(H264Context *h){
     const int mb_xy= h->mb_xy;
     int mb_type=0;
 
-    memset(h->non_zero_count[mb_xy], 0, 32);
-    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
+    memset(h->non_zero_count[mb_xy], 0, 48);
 
     if(MB_FIELD)
         mb_type|= MB_TYPE_INTERLACED;
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 69af1e2ded..f30f4e1c9c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -45,7 +45,7 @@
 
 /* Cabac pre state table */
 
-static const int8_t cabac_context_init_I[460][2] =
+static const int8_t cabac_context_init_I[1024][2] =
 {
     /* 0 - 10 */
     { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
@@ -211,10 +211,153 @@ static const int8_t cabac_context_init_I[460][2] =
     { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
     {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
     {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+
+    /* 460 -> 1024 */
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, { -14, 106 },
+    { -13,  97 }, { -15,  90 }, { -12,  90 }, { -18,  88 },
+    { -10,  73 }, {  -9,  79 }, { -14,  86 }, { -10,  73 },
+    { -10,  70 }, { -10,  69 }, {  -5,  66 }, {  -9,  64 },
+    {  -5,  58 }, {   2,  59 }, {  23, -13 }, {  26, -13 },
+    {  40, -15 }, {  49, -14 }, {  44,   3 }, {  45,   6 },
+    {  44,  34 }, {  33,  54 }, {  19,  82 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 }, { -17, 120 }, { -20, 112 },
+    { -18, 114 }, { -11,  85 }, { -15,  92 }, { -14,  89 },
+    { -26,  71 }, { -15,  81 }, { -14,  80 }, {   0,  68 },
+    { -14,  70 }, { -24,  56 }, { -23,  68 }, { -24,  50 },
+    { -11,  74 }, { -14, 106 }, { -13,  97 }, { -15,  90 },
+    { -12,  90 }, { -18,  88 }, { -10,  73 }, {  -9,  79 },
+    { -14,  86 }, { -10,  73 }, { -10,  70 }, { -10,  69 },
+    {  -5,  66 }, {  -9,  64 }, {  -5,  58 }, {   2,  59 },
+    {  23, -13 }, {  26, -13 }, {  40, -15 }, {  49, -14 },
+    {  44,   3 }, {  45,   6 }, {  44,  34 }, {  33,  54 },
+    {  19,  82 }, {  21, -10 }, {  24, -11 }, {  28,  -8 },
+    {  28,  -1 }, {  29,   3 }, {  29,   9 }, {  35,  20 },
+    {  29,  36 }, {  14,  67 }, {  -3,  75 }, {  -1,  23 },
+    {   1,  34 }, {   1,  43 }, {   0,  54 }, {  -2,  55 },
+    {   0,  61 }, {   1,  64 }, {   0,  68 }, {  -9,  92 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  -3,  71 }, {  -6,  42 }, {  -5,  50 }, {  -3,  54 },
+    {  -2,  62 }, {   0,  58 }, {   1,  63 }, {  -2,  72 },
+    {  -1,  74 }, {  -9,  91 }, {  -5,  67 }, {  -5,  27 },
+    {  -3,  39 }, {  -2,  44 }, {   0,  46 }, { -16,  64 },
+    {  -8,  68 }, { -10,  78 }, {  -6,  77 }, { -10,  86 },
+    { -12,  92 }, { -15,  55 }, { -10,  60 }, {  -6,  62 },
+    {  -4,  65 }, { -12,  73 }, {  -8,  76 }, {  -7,  80 },
+    {  -9,  88 }, { -17, 110 }, {  -3,  71 }, {  -6,  42 },
+    {  -5,  50 }, {  -3,  54 }, {  -2,  62 }, {   0,  58 },
+    {   1,  63 }, {  -2,  72 }, {  -1,  74 }, {  -9,  91 },
+    {  -5,  67 }, {  -5,  27 }, {  -3,  39 }, {  -2,  44 },
+    {   0,  46 }, { -16,  64 }, {  -8,  68 }, { -10,  78 },
+    {  -6,  77 }, { -10,  86 }, { -12,  92 }, { -15,  55 },
+    { -10,  60 }, {  -6,  62 }, {  -4,  65 }, { -12,  73 },
+    {  -8,  76 }, {  -7,  80 }, {  -9,  88 }, { -17, 110 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 }
 };
 
-static const int8_t cabac_context_init_PB[3][460][2] =
+static const int8_t cabac_context_init_PB[3][1024][2] =
 {
     /* i_cabac_init_idc == 0 */
     {
@@ -370,6 +513,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
         {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
         {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+
+        /* 460 - 1024 */
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {  -5,  85 },
+        {  -6,  81 }, { -10,  77 }, {  -7,  81 }, { -17,  80 },
+        { -18,  73 }, {  -4,  74 }, { -10,  83 }, {  -9,  71 },
+        {  -9,  67 }, {  -1,  61 }, {  -8,  66 }, { -14,  66 },
+        {   0,  59 }, {   2,  59 }, {   9,  -2 }, {  26,  -9 },
+        {  33,  -9 }, {  39,  -7 }, {  41,  -2 }, {  45,   3 },
+        {  49,   9 }, {  45,  27 }, {  36,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 }, {  -4,  79 }, {  -7,  71 },
+        {  -5,  69 }, {  -9,  70 }, {  -8,  66 }, { -10,  68 },
+        { -19,  73 }, { -12,  69 }, { -16,  70 }, { -15,  67 },
+        { -20,  62 }, { -19,  70 }, { -16,  66 }, { -22,  65 },
+        { -20,  63 }, {  -5,  85 }, {  -6,  81 }, { -10,  77 },
+        {  -7,  81 }, { -17,  80 }, { -18,  73 }, {  -4,  74 },
+        { -10,  83 }, {  -9,  71 }, {  -9,  67 }, {  -1,  61 },
+        {  -8,  66 }, { -14,  66 }, {   0,  59 }, {   2,  59 },
+        {   9,  -2 }, {  26,  -9 }, {  33,  -9 }, {  39,  -7 },
+        {  41,  -2 }, {  45,   3 }, {  49,   9 }, {  45,  27 },
+        {  36,  59 }, {  21, -13 }, {  33, -14 }, {  39,  -7 },
+        {  46,  -2 }, {  51,   2 }, {  60,   6 }, {  61,  17 },
+        {  55,  34 }, {  42,  62 }, {  -6,  66 }, {  -7,  35 },
+        {  -7,  42 }, {  -8,  45 }, {  -5,  48 }, { -12,  56 },
+        {  -6,  60 }, {  -5,  62 }, {  -8,  66 }, {  -8,  76 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {  -6,  76 }, {  -2,  44 },
+        {   0,  45 }, {   0,  52 }, {  -3,  64 }, {  -2,  59 },
+        {  -4,  70 }, {  -4,  75 }, {  -8,  82 }, { -17, 102 },
+        {  -9,  77 }, {   3,  24 }, {   0,  42 }, {   0,  48 },
+        {   0,  55 }, {  -6,  59 }, {  -7,  71 }, { -12,  83 },
+        { -11,  87 }, { -30, 119 }, {   1,  58 }, {  -3,  29 },
+        {  -1,  36 }, {   1,  38 }, {   2,  43 }, {  -6,  55 },
+        {   0,  58 }, {   0,  64 }, {  -3,  74 }, { -10,  90 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 }
     },
 
     /* i_cabac_init_idc == 1 */
@@ -526,6 +812,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
         {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
         {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+
+        /* 460 - 1024 */
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  -3,  81 },
+        {  -3,  76 }, {  -7,  72 }, {  -6,  78 }, { -12,  72 },
+        { -14,  68 }, {  -3,  70 }, {  -6,  76 }, {  -5,  66 },
+        {  -5,  62 }, {   0,  57 }, {  -4,  61 }, {  -9,  60 },
+        {   1,  54 }, {   2,  58 }, {  17, -10 }, {  32, -13 },
+        {  42,  -9 }, {  49,  -5 }, {  53,   0 }, {  64,   3 },
+        {  68,  10 }, {  66,  27 }, {  47,  57 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 }, {  -5,  85 }, {  -6,  81 },
+        { -10,  77 }, {  -7,  81 }, { -17,  80 }, { -18,  73 },
+        {  -4,  74 }, { -10,  83 }, {  -9,  71 }, {  -9,  67 },
+        {  -1,  61 }, {  -8,  66 }, { -14,  66 }, {   0,  59 },
+        {   2,  59 }, {  -3,  81 }, {  -3,  76 }, {  -7,  72 },
+        {  -6,  78 }, { -12,  72 }, { -14,  68 }, {  -3,  70 },
+        {  -6,  76 }, {  -5,  66 }, {  -5,  62 }, {   0,  57 },
+        {  -4,  61 }, {  -9,  60 }, {   1,  54 }, {   2,  58 },
+        {  17, -10 }, {  32, -13 }, {  42,  -9 }, {  49,  -5 },
+        {  53,   0 }, {  64,   3 }, {  68,  10 }, {  66,  27 },
+        {  47,  57 }, {  17, -10 }, {  32, -13 }, {  42,  -9 },
+        {  49,  -5 }, {  53,   0 }, {  64,   3 }, {  68,  10 },
+        {  66,  27 }, {  47,  57 }, {  -5,  71 }, {   0,  24 },
+        {  -1,  36 }, {  -2,  42 }, {  -2,  52 }, {  -9,  57 },
+        {  -6,  63 }, {  -4,  65 }, {  -4,  67 }, {  -7,  82 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, { -23, 112 }, { -15,  71 },
+        {  -7,  61 }, {   0,  53 }, {  -5,  66 }, { -11,  77 },
+        {  -9,  80 }, {  -9,  84 }, { -10,  87 }, { -34, 127 },
+        { -21, 101 }, {  -3,  39 }, {  -5,  53 }, {  -7,  61 },
+        { -11,  75 }, { -15,  77 }, { -17,  91 }, { -25, 107 },
+        { -25, 111 }, { -28, 122 }, { -11,  76 }, { -10,  44 },
+        { -10,  52 }, { -10,  57 }, {  -9,  58 }, { -16,  72 },
+        {  -7,  69 }, {  -4,  69 }, {  -5,  74 }, {  -9,  86 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 }
     },
 
     /* i_cabac_init_idc == 2 */
@@ -682,6 +1111,149 @@ static const int8_t cabac_context_init_PB[3][460][2] =
         { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
         {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
         {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+
+        /* 460 - 1024 */
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {  -3,  78 },
+        {  -8,  74 }, {  -9,  72 }, { -10,  72 }, { -18,  75 },
+        { -12,  71 }, { -11,  63 }, {  -5,  70 }, { -17,  75 },
+        { -14,  72 }, { -16,  67 }, {  -8,  53 }, { -14,  59 },
+        {  -9,  52 }, { -11,  68 }, {   9,  -2 }, {  30, -10 },
+        {  31,  -4 }, {  33,  -1 }, {  33,   7 }, {  31,  12 },
+        {  37,  23 }, {  31,  38 }, {  20,  64 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 }, {  -3,  78 }, {  -8,  74 },
+        {  -9,  72 }, { -10,  72 }, { -18,  75 }, { -12,  71 },
+        { -11,  63 }, {  -5,  70 }, { -17,  75 }, { -14,  72 },
+        { -16,  67 }, {  -8,  53 }, { -14,  59 }, {  -9,  52 },
+        { -11,  68 }, {  -3,  78 }, {  -8,  74 }, {  -9,  72 },
+        { -10,  72 }, { -18,  75 }, { -12,  71 }, { -11,  63 },
+        {  -5,  70 }, { -17,  75 }, { -14,  72 }, { -16,  67 },
+        {  -8,  53 }, { -14,  59 }, {  -9,  52 }, { -11,  68 },
+        {   9,  -2 }, {  30, -10 }, {  31,  -4 }, {  33,  -1 },
+        {  33,   7 }, {  31,  12 }, {  37,  23 }, {  31,  38 },
+        {  20,  64 }, {   9,  -2 }, {  30, -10 }, {  31,  -4 },
+        {  33,  -1 }, {  33,   7 }, {  31,  12 }, {  37,  23 },
+        {  31,  38 }, {  20,  64 }, {  -9,  71 }, {  -7,  37 },
+        {  -8,  44 }, { -11,  49 }, { -10,  56 }, { -12,  59 },
+        {  -8,  63 }, {  -9,  67 }, {  -6,  68 }, { -10,  79 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, { -24, 115 }, { -22,  82 },
+        {  -9,  62 }, {   0,  53 }, {   0,  59 }, { -14,  85 },
+        { -13,  89 }, { -13,  94 }, { -11,  92 }, { -29, 127 },
+        { -21, 100 }, { -14,  57 }, { -12,  67 }, { -11,  71 },
+        { -10,  77 }, { -21,  85 }, { -16,  88 }, { -23, 104 },
+        { -15,  98 }, { -37, 127 }, { -10,  82 }, {  -8,  48 },
+        {  -8,  61 }, {  -8,  66 }, {  -7,  70 }, { -14,  75 },
+        { -10,  79 }, {  -9,  83 }, { -12,  92 }, { -18, 108 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 }
     }
 };
 
@@ -695,7 +1267,7 @@ void ff_h264_init_cabac_states(H264Context *h) {
     else                                 tab = cabac_context_init_PB[h->cabac_init_idc];
 
     /* calculate pre-state */
-    for( i= 0; i < 460; i++ ) {
+    for( i= 0; i < 1024; i++ ) {
         int pre = 2*(((tab[i][0] * slice_qp) >>4 ) + tab[i][1]) - 127;
 
         pre^= pre>>31;
@@ -957,21 +1529,22 @@ static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda
     my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
 }
 
-static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
+static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int max_coeff, int is_dc ) {
     int nza, nzb;
     int ctx = 0;
+    static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
 
     if( is_dc ) {
-        if( cat == 0 ) {
-            nza = h->left_cbp&0x100;
-            nzb = h-> top_cbp&0x100;
-        } else {
+        if( cat == 3 ) {
             idx -= CHROMA_DC_BLOCK_INDEX;
             nza = (h->left_cbp>>(6+idx))&0x01;
             nzb = (h-> top_cbp>>(6+idx))&0x01;
+        } else {
+            idx -= LUMA_DC_BLOCK_INDEX;
+            nza = h->left_cbp&(0x100<<idx);
+            nzb = h-> top_cbp&(0x100<<idx);
         }
     } else {
-        assert(cat == 1 || cat == 2 || cat == 4);
         nza = h->non_zero_count_cache[scan8[idx] - 1];
         nzb = h->non_zero_count_cache[scan8[idx] - 8];
     }
@@ -982,7 +1555,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
     if( nzb > 0 )
         ctx += 2;
 
-    return ctx + 4 * cat;
+    return base_ctx[cat] + ctx;
 }
 
 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
@@ -993,16 +1566,16 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
 };
 
 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
-    static const int significant_coeff_flag_offset[2][6] = {
-      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
-      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
+    static const int significant_coeff_flag_offset[2][14] = {
+      { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
+      { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
     };
-    static const int last_coeff_flag_offset[2][6] = {
-      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
-      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
+    static const int last_coeff_flag_offset[2][14] = {
+      { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
+      { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
     };
-    static const int coeff_abs_level_m1_offset[6] = {
-        227+0, 227+10, 227+20, 227+30, 227+39, 426
+    static const int coeff_abs_level_m1_offset[14] = {
+        227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
     };
     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
@@ -1057,7 +1630,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     abs_level_m1_ctx_base = h->cabac_state
         + coeff_abs_level_m1_offset[cat];
 
-    if( !is_dc && cat == 5 ) {
+    if( !is_dc && max_coeff == 64 ) {
 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
         for(last= 0; last < coefs; last++) { \
             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
@@ -1075,9 +1648,11 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         }
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
-        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
+                                                 last_coeff_ctx_base-significant_coeff_ctx_base, sig_off);
     } else {
-        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
+                                             last_coeff_ctx_base-significant_coeff_ctx_base);
 #else
         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
     } else {
@@ -1087,16 +1662,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     assert(coeff_count > 0);
 
     if( is_dc ) {
-        if( cat == 0 )
-            h->cbp_table[h->mb_xy] |= 0x100;
-        else
+        if( cat == 3 )
             h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
+        else
+            h->cbp_table[h->mb_xy] |= 0x100 << (n - LUMA_DC_BLOCK_INDEX);
         h->non_zero_count_cache[scan8[n]] = coeff_count;
     } else {
-        if( cat == 5 )
+        if( max_coeff == 64 )
             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
         else {
-            assert( cat == 1 || cat == 2 || cat == 4 );
+            assert( cat == 1 || cat ==  2 || cat ==  4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 );
             h->non_zero_count_cache[scan8[n]] = coeff_count;
         }
     }
@@ -1179,7 +1754,7 @@ static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block
 
 static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
     /* read coded block flag */
-    if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) {
+    if( get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 1 ) ] ) == 0 ) {
         h->non_zero_count_cache[scan8[n]] = 0;
         return;
     }
@@ -1188,13 +1763,68 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
 
 static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
     /* read coded block flag */
-    if( cat != 5 && get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) {
-        h->non_zero_count_cache[scan8[n]] = 0;
+    if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
+        if( max_coeff == 64 ) {
+            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 0, 1);
+        } else {
+            h->non_zero_count_cache[scan8[n]] = 0;
+        }
         return;
     }
     decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff );
 }
 
+static av_always_inline void decode_cabac_luma_residual( H264Context *h, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p )
+{
+    static const uint8_t ctx_cat[4][3] = {{0,6,10},{1,7,11},{2,8,12},{5,9,13}};
+    const uint32_t *qmul;
+    int i8x8, i4x4;
+    MpegEncContext * const s = &h->s;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    if( IS_INTRA16x16( mb_type ) ) {
+        //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
+        AV_ZERO128(h->mb_luma_dc[p]+0);
+        AV_ZERO128(h->mb_luma_dc[p]+8);
+        AV_ZERO128(h->mb_luma_dc[p]+16);
+        AV_ZERO128(h->mb_luma_dc[p]+24);
+        decode_cabac_residual_dc(h, h->mb_luma_dc[p], ctx_cat[0][p], LUMA_DC_BLOCK_INDEX+p, scan, 16);
+
+        if( cbp&15 ) {
+            qmul = h->dequant4_coeff[p][qscale];
+            for( i4x4 = 0; i4x4 < 16; i4x4++ ) {
+                const int index = 16*p + i4x4;
+                //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", index );
+                decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[1][p], index, scan + 1, qmul, 15);
+            }
+        } else {
+            fill_rectangle(&h->non_zero_count_cache[scan8[16*p]], 4, 4, 8, 0, 1);
+        }
+    } else {
+        int cqm = (IS_INTRA( mb_type ) ? 0:3) + p;
+        for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+            if( cbp & (1<<i8x8) ) {
+                if( IS_8x8DCT(mb_type) ) {
+                    const int index = 16*p + 4*i8x8;
+                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[3][p], index,
+                                                scan8x8, h->dequant8_coeff[cqm][qscale], 64);
+                } else {
+                    qmul = h->dequant4_coeff[cqm][qscale];
+                    for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+                        const int index = 16*p + 4*i8x8 + i4x4;
+                        //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+//START_TIMER
+                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[2][p], index, scan, qmul, 16);
+//STOP_TIMER("decode_residual")
+                    }
+                }
+            } else {
+                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
+                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+            }
+        }
+    }
+}
+
 /**
  * decodes a macroblock
  * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
@@ -1204,6 +1834,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int mb_xy;
     int mb_type, partition_count, cbp = 0;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -1313,7 +1944,8 @@ decode_intra_mb:
     h->slice_table[ mb_xy ]= h->slice_num;
 
     if(IS_INTRA_PCM(mb_type)) {
-        const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
+        static const uint16_t mb_sizes[4] = {256,384,512,768};
+        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
         const uint8_t *ptr;
 
         // We assume these blocks are very rare so we do not optimize it.
@@ -1326,20 +1958,17 @@ decode_intra_mb:
         }
 
         // The pixels are stored in the same order as levels in h->mb array.
-        memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
-        if(CHROMA){
-            memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
-        }
+        memcpy(h->mb, ptr, mb_size); ptr+=mb_size;
 
         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
 
         // All blocks are present
-        h->cbp_table[mb_xy] = 0x1ef;
+        h->cbp_table[mb_xy] = 0xf7ef;
         h->chroma_pred_mode_table[mb_xy] = 0;
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 32);
+        memset(h->non_zero_count[mb_xy], 16, 48);
         s->current_picture.mb_type[mb_xy]= mb_type;
         h->last_qscale_diff = 0;
         return 0;
@@ -1376,7 +2005,7 @@ decode_intra_mb:
             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
             if( h->intra16x16_pred_mode < 0 ) return -1;
         }
-        if(CHROMA){
+        if(decode_chroma){
             h->chroma_pred_mode_table[mb_xy] =
             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
 
@@ -1605,7 +2234,7 @@ decode_intra_mb:
 
     if( !IS_INTRA16x16( mb_type ) ) {
         cbp  = decode_cabac_mb_cbp_luma( h );
-        if(CHROMA)
+        if(decode_chroma)
             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
     }
 
@@ -1614,6 +2243,28 @@ decode_intra_mb:
     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
         mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
     }
+
+    /* It would be better to do this in fill_decode_caches, but we don't know
+     * the transform mode of the current macroblock there. */
+    if (CHROMA444 && IS_8x8DCT(mb_type)){
+        int i;
+        for (i = 0; i < 2; i++){
+            if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
+                h->non_zero_count_cache[3+8* 1 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 2 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 6 + 2*8*i]=
+                h->non_zero_count_cache[3+8* 7 + 2*8*i]=
+                h->non_zero_count_cache[3+8*11 + 2*8*i]=
+                h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+            }
+        }
+        if (h->top_type && !IS_8x8DCT(h->top_type)){
+            uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+            AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
+            AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
+            AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+        }
+    }
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if( cbp || IS_INTRA16x16( mb_type ) ) {
@@ -1658,76 +2309,38 @@ decode_intra_mb:
         }else
             h->last_qscale_diff=0;
 
-        if( IS_INTRA16x16( mb_type ) ) {
-            int i;
-            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
-            AV_ZERO128(h->mb_luma_dc+0);
-            AV_ZERO128(h->mb_luma_dc+8);
-            AV_ZERO128(h->mb_luma_dc+16);
-            AV_ZERO128(h->mb_luma_dc+24);
-            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
+        decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 0);
+        if(CHROMA444){
+            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
+            decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
+        } else {
+            if( cbp&0x30 ){
+                int c;
+                for( c = 0; c < 2; c++ ) {
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
+                    decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+                }
+            }
 
-            if( cbp&15 ) {
-                qmul = h->dequant4_coeff[0][s->qscale];
-                for( i = 0; i < 16; i++ ) {
-                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
+            if( cbp&0x20 ) {
+                int c, i;
+                for( c = 0; c < 2; c++ ) {
+                    qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
+                    for( i = 0; i < 4; i++ ) {
+                        const int index = 16 + 16 * c + i;
+                        //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
+                        decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
+                    }
                 }
             } else {
-                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        } else {
-            int i8x8, i4x4;
-            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
-                if( cbp & (1<<i8x8) ) {
-                    if( IS_8x8DCT(mb_type) ) {
-                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
-                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
-                    } else {
-                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
-                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
-                            const int index = 4*i8x8 + i4x4;
-                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
-//START_TIMER
-                            decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
-//STOP_TIMER("decode_residual")
-                        }
-                    }
-                } else {
-                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-                }
-            }
-        }
-
-        if( cbp&0x30 ){
-            int c;
-            for( c = 0; c < 2; c++ ) {
-                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
-            }
-        }
-
-        if( cbp&0x20 ) {
-            int c, i;
-            for( c = 0; c < 2; c++ ) {
-                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
-                for( i = 0; i < 4; i++ ) {
-                    const int index = 16 + 4 * c + i;
-                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
-                }
-            }
-        } else {
-            uint8_t * const nnz= &h->non_zero_count_cache[0];
-            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         }
     } else {
-        uint8_t * const nnz= &h->non_zero_count_cache[0];
-        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
-        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
         h->last_qscale_diff = 0;
     }
 
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 2e5ea54679..497166b423 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -371,12 +371,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     //FIXME put trailing_onex into the context
 
-    if(n >= CHROMA_DC_BLOCK_INDEX){
+    if(max_coeff <= 8){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
     }else{
-        if(n == LUMA_DC_BLOCK_INDEX){
-            total_coeff= pred_non_zero_count(h, 0);
+        if(n >= LUMA_DC_BLOCK_INDEX){
+            total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
             total_coeff= coeff_token>>2;
         }else{
@@ -482,7 +482,8 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     if(total_coeff == max_coeff)
         zeros_left=0;
     else{
-        if(n >= CHROMA_DC_BLOCK_INDEX)
+        /* FIXME: we don't actually support 4:2:2 yet. */
+        if(max_coeff <= 8)
             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
         else
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
@@ -536,12 +537,80 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     return 0;
 }
 
+static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
+    int i4x4, i8x8;
+    MpegEncContext * const s = &h->s;
+    int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
+    if(IS_INTRA16x16(mb_type)){
+        AV_ZERO128(h->mb_luma_dc[p]+0);
+        AV_ZERO128(h->mb_luma_dc[p]+8);
+        AV_ZERO128(h->mb_luma_dc[p]+16);
+        AV_ZERO128(h->mb_luma_dc[p]+24);
+        if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
+            return -1; //FIXME continue if partitioned and other return -1 too
+        }
+
+        assert((cbp&15) == 0 || (cbp&15) == 15);
+
+        if(cbp&15){
+            for(i8x8=0; i8x8<4; i8x8++){
+                for(i4x4=0; i4x4<4; i4x4++){
+                    const int index= i4x4 + 4*i8x8 + p*16;
+                    if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
+                        index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
+                        return -1;
+                    }
+                }
+            }
+            return 0xf;
+        }else{
+            fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
+            return 0;
+        }
+    }else{
+        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
+        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
+        int new_cbp = 0;
+        for(i8x8=0; i8x8<4; i8x8++){
+            if(cbp & (1<<i8x8)){
+                if(IS_8x8DCT(mb_type)){
+                    DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
+                    uint8_t *nnz;
+                    for(i4x4=0; i4x4<4; i4x4++){
+                        const int index= i4x4 + 4*i8x8 + p*16;
+                        if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
+                                            h->dequant8_coeff[cqm][qscale], 16) < 0 )
+                            return -1;
+                    }
+                    nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
+                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
+                    new_cbp |= !!nnz[0] << i8x8;
+                }else{
+                    for(i4x4=0; i4x4<4; i4x4++){
+                        const int index= i4x4 + 4*i8x8 + p*16;
+                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
+                                            scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
+                            return -1;
+                        }
+                        new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
+                    }
+                }
+            }else{
+                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
+                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+            }
+        }
+        return new_cbp;
+    }
+}
+
 int ff_h264_decode_mb_cavlc(H264Context *h){
     MpegEncContext * const s = &h->s;
     int mb_xy;
     int partition_count;
     unsigned int mb_type, cbp;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -608,19 +677,21 @@ decode_intra_mb:
 
     if(IS_INTRA_PCM(mb_type)){
         unsigned int x;
+        static const uint16_t mb_sizes[4] = {256,384,512,768};
+        const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
 
         // We assume these blocks are very rare so we do not optimize it.
         align_get_bits(&s->gb);
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
+        for(x=0; x < mb_size; x++){
             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
 
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         // All coeffs are present
-        memset(h->non_zero_count[mb_xy], 16, 32);
+        memset(h->non_zero_count[mb_xy], 16, 48);
 
         s->current_picture.mb_type[mb_xy]= mb_type;
         return 0;
@@ -668,7 +739,7 @@ decode_intra_mb:
             if(h->intra16x16_pred_mode < 0)
                 return -1;
         }
-        if(CHROMA){
+        if(decode_chroma){
             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
             if(pred_mode < 0)
                 return -1;
@@ -896,15 +967,19 @@ decode_intra_mb:
 
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&s->gb);
-        if(cbp > 47){
-            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
-            return -1;
-        }
 
-        if(CHROMA){
+        if(decode_chroma){
+            if(cbp > 47){
+                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
+                return -1;
+            }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
             else                     cbp= golomb_to_inter_cbp   [cbp];
         }else{
+            if(cbp > 15){
+                av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
+                return -1;
+            }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
             else                     cbp= golomb_to_inter_cbp_gray[cbp];
         }
@@ -918,8 +993,9 @@ decode_intra_mb:
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if(cbp || IS_INTRA16x16(mb_type)){
-        int i8x8, i4x4, chroma_idx;
+        int i4x4, chroma_idx;
         int dquant;
+        int ret;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
         const uint8_t *scan, *scan8x8;
         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
@@ -947,85 +1023,45 @@ decode_intra_mb:
 
         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
-        if(IS_INTRA16x16(mb_type)){
-            AV_ZERO128(h->mb_luma_dc+0);
-            AV_ZERO128(h->mb_luma_dc+8);
-            AV_ZERO128(h->mb_luma_dc+16);
-            AV_ZERO128(h->mb_luma_dc+24);
-            if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
-                return -1; //FIXME continue if partitioned and other return -1 too
+
+        if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
+            return -1;
+        }
+        h->cbp_table[mb_xy] |= ret << 12;
+        if(CHROMA444){
+            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
+                return -1;
+            }
+            if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
+                return -1;
+            }
+        } else {
+            if(cbp&0x30){
+                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
+                    if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+                        return -1;
+                    }
             }
 
-            assert((cbp&15) == 0 || (cbp&15) == 15);
-
-            if(cbp&15){
-                for(i8x8=0; i8x8<4; i8x8++){
+            if(cbp&0x20){
+                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                     for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+                        const int index= 16 + 16*chroma_idx + i4x4;
+                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
                             return -1;
                         }
                     }
                 }
             }else{
-                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        }else{
-            for(i8x8=0; i8x8<4; i8x8++){
-                if(cbp & (1<<i8x8)){
-                    if(IS_8x8DCT(mb_type)){
-                        DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
-                        uint8_t *nnz;
-                        for(i4x4=0; i4x4<4; i4x4++){
-                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
-                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
-                                return -1;
-                        }
-                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
-                    }else{
-                        for(i4x4=0; i4x4<4; i4x4++){
-                            const int index= i4x4 + 4*i8x8;
-
-                            if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
-                                return -1;
-                            }
-                        }
-                    }
-                }else{
-                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
-                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
-                }
-            }
-        }
-
-        if(cbp&0x30){
-            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
-                    return -1;
-                }
-        }
-
-        if(cbp&0x20){
-            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
-                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
-                for(i4x4=0; i4x4<4; i4x4++){
-                    const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
-                        return -1;
-                    }
-                }
-            }
-        }else{
-            uint8_t * const nnz= &h->non_zero_count_cache[0];
-            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         }
     }else{
-        uint8_t * const nnz= &h->non_zero_count_cache[0];
-        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
-        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
-        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
+        fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
     }
     s->current_picture.qscale_table[mb_xy]= s->qscale;
     write_back_non_zero_count(h);
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 72b1905936..d4ecefcf08 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -220,7 +220,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
@@ -353,9 +353,10 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     return v;
 }
 
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma444, int dir) {
     MpegEncContext * const s = &h->s;
     int edge;
+    int chroma_qp_avg[2];
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
     const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
 
@@ -394,7 +395,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                         bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]);
                         bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]);
                     }else{
-                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8;
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
                     int i;
                     for( i = 0; i < 4; i++ ) {
                         bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
@@ -407,10 +408,15 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
                 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
-                filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
-                                ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
-                filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
-                                ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
+                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                if (chroma444) {
+                    filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                    filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                } else {
+                    filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                    filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                }
             }
         }else{
             DECLARE_ALIGNED(8, int16_t, bS)[4];
@@ -465,23 +471,29 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
                 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+                chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
+                chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                 if( dir == 0 ) {
                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
                     {
-                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h);
-                        if(h->pps.chroma_qp_diff)
-                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h);
+                        if (chroma444) {
+                            filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        } else {
+                            filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        }
                     }
                 } else {
                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
                     {
-                        int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h);
-                        if(h->pps.chroma_qp_diff)
-                            qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
-                        filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h);
+                        if (chroma444) {
+                            filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        } else {
+                            filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
+                            filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
+                        }
                     }
                 }
             }
@@ -545,13 +557,19 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
-            if( (edge&1) == 0 ) {
+            if (chroma444) {
+                filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+            } else if( (edge&1) == 0 ) {
                 filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
                 filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
-            if( (edge&1) == 0 ) {
+            if (chroma444) {
+                filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+            } else if( (edge&1) == 0 ) {
                 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
                 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
             }
@@ -589,11 +607,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         } else {
             static const uint8_t offset[2][2][8]={
                 {
-                    {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1},
-                    {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3},
+                    {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
+                    {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
                 },{
-                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
-                    {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
+                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
+                    {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
                 }
             };
             const uint8_t *off= offset[MB_FIELD][mb_y&1];
@@ -650,9 +668,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
 
 #if CONFIG_SMALL
     for( dir = 0; dir < 2; dir++ )
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, CHROMA444, dir);
 #else
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, CHROMA444, 0);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, CHROMA444, 1);
 #endif
 }
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index a98f14aaf6..9c41e4ca73 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -269,7 +269,7 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
-        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
+        fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
     };
     if(get_bits1(&s->gb)){
         sps->scaling_matrix_present |= is_sps;
@@ -281,7 +281,15 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
         if(is_sps || pps->transform_8x8_mode){
             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
-            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
+            if(h->sps.chroma_format_idc == 3){
+                decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[0],scaling_matrix8[0]);  // Intra, Cr
+                decode_scaling_list(h,scaling_matrix8[2],64,default_scaling8[0],scaling_matrix8[1]);  // Intra, Cb
+            }
+            decode_scaling_list(h,scaling_matrix8[3],64,default_scaling8[1],fallback[3]);  // Inter, Y
+            if(h->sps.chroma_format_idc == 3){
+                decode_scaling_list(h,scaling_matrix8[4],64,default_scaling8[1],scaling_matrix8[3]);  // Inter, Cr
+                decode_scaling_list(h,scaling_matrix8[5],64,default_scaling8[1],scaling_matrix8[4]);  // Inter, Cb
+            }
         }
     }
 }
@@ -395,7 +403,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
         if(sps->crop_left || sps->crop_top){
             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
         }
-        if(sps->crop_right >= 8 || sps->crop_bottom >= 8){
+        if(sps->crop_right >= (8<<CHROMA444) || sps->crop_bottom >= (8<<CHROMA444)){
             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
         }
     }else{
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 864c118bb5..6972725781 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -66,10 +66,10 @@ typedef struct H264DSPContext{
     void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
     void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
 
-    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
+    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
     void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 39c9a1c9eb..e7f9af7fb0 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -30,15 +30,19 @@
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
 #define AVCODEC_H264IDCT_INTERNAL_H
 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-static const uint8_t scan8[16 + 2*4]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
+static const uint8_t scan8[16*3]={
+ 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+ 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+ 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+ 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+ 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+ 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+ 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+ 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+ 4+11*8, 5+11*8, 4+12*8, 5+12*8,
+ 6+11*8, 7+11*8, 6+12*8, 7+12*8,
+ 4+13*8, 5+13*8, 4+14*8, 5+14*8,
+ 6+13*8, 7+13*8, 6+14*8, 7+14*8
 };
 #endif
 
@@ -190,7 +194,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     }
 }
 
-void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         int nnz = nnzc[ scan8[i] ];
@@ -201,7 +205,7 @@ void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         if(nnzc[ scan8[i] ])             FUNCC(idct_internal      )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
@@ -209,7 +213,7 @@ void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTEL
     }
 }
 
-void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i+=4){
         int nnz = nnzc[ scan8[i] ];
@@ -220,13 +224,15 @@ void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *b
     }
 }
 
-void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=16; i<16+8; i++){
-        if(nnzc[ scan8[i] ])
-            FUNCC(ff_h264_idct_add   )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
-        else if(((dctcoef*)block)[i*16])
-            FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+    int i, j;
+    for(j=1; j<3; j++){
+        for(i=j*16; i<j*16+4; i++){
+            if(nnzc[ scan8[i] ])
+                FUNCC(ff_h264_idct_add   )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+            else if(((dctcoef*)block)[i*16])
+                FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+        }
     }
 }
 /**
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 6a45da8761..4978d28b49 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1185,15 +1185,17 @@ void MPV_frame_end(MpegEncContext *s)
        && s->current_picture.reference
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+            int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
+            int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  ,
-                              s->h_edge_pos   , s->v_edge_pos   ,
-                              EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos             , s->v_edge_pos,
+                              EDGE_WIDTH        , EDGE_WIDTH        , EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
+                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                              s->h_edge_pos>>hshift, s->v_edge_pos>>vshift,
+                              EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM);
     }
 
     emms_c();
@@ -2284,14 +2286,19 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
         int sides = 0, edge_h;
+        int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
+        int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
         if (y==0) sides |= EDGE_TOP;
         if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
 
         edge_h= FFMIN(h, s->v_edge_pos - y);
 
-        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y    *s->linesize  , s->linesize  , s->h_edge_pos   , edge_h   , EDGE_WIDTH  , sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
-        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y         *s->linesize  , s->linesize,
+                          s->h_edge_pos        , edge_h        , EDGE_WIDTH        , EDGE_WIDTH        , sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
+                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>vshift)*s->uvlinesize, s->uvlinesize,
+                          s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides);
     }
 
     h= FFMIN(h, s->avctx->height - y);
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index fae0674720..05fae831c9 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -527,7 +527,7 @@ static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int strid
     h264_idct_dc_add_internal(dst, block, stride, 8);
 }
 
-static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         int nnz = nnzc[ scan8[i] ];
@@ -538,7 +538,7 @@ static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DC
     }
 }
 
-static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i++){
         if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
@@ -546,7 +546,7 @@ static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offse
     }
 }
 
-static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
     int i;
     for(i=0; i<16; i+=4){
         int nnz = nnzc[ scan8[i] ];
@@ -557,13 +557,15 @@ static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DC
     }
 }
 
-static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=16; i<16+8; i++){
-        if(nnzc[ scan8[i] ])
-            ff_h264_idct_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
-        else if(block[i*16])
-            h264_idct_dc_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+    int i, j;
+    for (j = 1; j < 3; j++) {
+        for(i = j * 16; i < j * 16 + 4; i++){
+            if(nnzc[ scan8[i] ])
+                ff_h264_idct_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
+            else if(block[i*16])
+                h264_idct_dc_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
+        }
     }
 }
 
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 6db0b290ba..28f04f119b 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1978,13 +1978,13 @@ static int frame_start(SnowContext *s){
     if(s->current_picture.data[0]){
         s->dsp.draw_edges(s->current_picture.data[0],
                           s->current_picture.linesize[0], w   , h   ,
-                          EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[1],
                           s->current_picture.linesize[1], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[2],
                           s->current_picture.linesize[2], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
     }
 
     release_buffer(s->avctx);
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 1cc6991666..214c6a3945 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -784,7 +784,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
 
 /* draw the edges of width 'w' of an image of size width, height
    this mmx version can only handle w==8 || w==16 */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides)
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
 {
     uint8_t *ptr, *last_line;
     int i;
@@ -839,7 +839,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
 
     /* top and bottom (and hopefully also the corners) */
     if (sides&EDGE_TOP) {
-        for(i = 0; i < w; i += 4) {
+        for(i = 0; i < h; i += 4) {
             ptr= buf - (i + 1) * wrap - w;
             __asm__ volatile(
                     "1:                             \n\t"
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index c850dc2ef3..e2dffe1e46 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -36,7 +36,7 @@
 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
 static int decode_significance_x86(CABACContext *c, int max_coeff,
                                    uint8_t *significant_coeff_ctx_base,
-                                   int *index){
+                                   int *index, x86_reg last_off){
     void *end= significant_coeff_ctx_base + max_coeff - 1;
     int minusstart= -(int)significant_coeff_ctx_base;
     int minusindex= 4-(int)index;
@@ -52,10 +52,12 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         "test $1, %%edx                         \n\t"
         " jz 3f                                 \n\t"
+        "add  %7, %1                            \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
+        "sub  %7, %1                            \n\t"
         "mov  %2, %%"REG_a"                     \n\t"
         "movl %4, %%ecx                         \n\t"
         "add  %1, %%"REG_c"                     \n\t"
@@ -82,7 +84,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
-        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
+        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
         : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
     );
     return coeff_count;
@@ -90,7 +92,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
 static int decode_significance_8x8_x86(CABACContext *c,
                                        uint8_t *significant_coeff_ctx_base,
-                                       int *index, const uint8_t *sig_off){
+                                       int *index, x86_reg last_off, const uint8_t *sig_off){
     int minusindex= 4-(int)index;
     int coeff_count;
     x86_reg last=0;
@@ -114,8 +116,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
         "add %5, %%"REG_D"                      \n\t"
+        "add %7, %%"REG_D"                      \n\t"
 
-        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
                              "%%bx", "%%esi", "%%eax", "%%al")
 
         "mov %2, %%"REG_a"                      \n\t"
@@ -142,7 +145,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movl %%esi, "RANGE    "(%3)            \n\t"
         "movl %%ebx, "LOW      "(%3)            \n\t"
         :"=&a"(coeff_count),"+m"(last), "+m"(index)
-        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off)
         : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
     );
     return coeff_count;
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index f90f41c4bc..4788da98e0 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -32,14 +32,18 @@
 SECTION_RODATA
 
 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
-           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
-           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
-           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
-           db 1+1*8, 2+1*8
-           db 1+2*8, 2+2*8
-           db 1+4*8, 2+4*8
-           db 1+5*8, 2+5*8
+scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
+           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
+           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
+           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
 %ifdef PIC
 %define scan8 r11
 %else
@@ -617,6 +621,8 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
     mov         r10, r0
 %endif
     call         h264_idct_add8_mmx_plane
+    mov          r5, 32
+    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -678,6 +684,8 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
     lea         r11, [scan8_mem]
 %endif
     call h264_idct_add8_mmx2_plane
+    mov          r5, 32
+    add          r2, 384
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
@@ -810,12 +818,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     test        r0, r0
     jz .try%1dc
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+%1*8+64]
+    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+%1*8+64]
+    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
 %endif
     call        x264_add8x4_idct_sse2
     jmp .cycle%1end
@@ -824,16 +832,18 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
     or         r0w, word [r2+32]
     jz .cycle%1end
 %ifdef ARCH_X86_64
-    mov        r0d, dword [r1+%1*8+64]
+    mov        r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
     add         r0, [r10]
 %else
     mov         r0, r0m
     mov         r0, [r0]
-    add         r0, dword [r1+%1*8+64]
+    add         r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
 %endif
     call        h264_idct_dc_add8_mmx2
 .cycle%1end
-%if %1 < 3
+%if %1 == 1
+    add         r2, 384+64
+%elif %1 < 3
     add         r2, 64
 %endif
 %endmacro
@@ -845,15 +855,15 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8
 %ifdef ARCH_X86_64
     mov         r10, r0
 %endif
-    add8_sse2_cycle 0, 0x09
-    add8_sse2_cycle 1, 0x11
+    add8_sse2_cycle 0, 0x34
+    add8_sse2_cycle 1, 0x3c
 %ifdef ARCH_X86_64
     add         r10, gprsize
 %else
     add        r0mp, gprsize
 %endif
-    add8_sse2_cycle 2, 0x21
-    add8_sse2_cycle 3, 0x29
+    add8_sse2_cycle 2, 0x5c
+    add8_sse2_cycle 3, 0x64
     RET
 
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 3f7cf4cefc..54636a95d0 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -29,14 +29,18 @@ SECTION_RODATA
 
 pw_pixel_max: times 8 dw ((1 << 10)-1)
 pd_32:        times 4 dd 32
-scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
-           db 6+1*8, 7+1*8, 6+2*8, 7+2*8
-           db 4+3*8, 5+3*8, 4+4*8, 5+4*8
-           db 6+3*8, 7+3*8, 6+4*8, 7+4*8
-           db 1+1*8, 2+1*8
-           db 1+2*8, 2+2*8
-           db 1+4*8, 2+4*8
-           db 1+5*8, 2+5*8
+scan8_mem: db  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+           db  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+           db  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+           db  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+           db  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+           db  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+           db  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+           db  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+           db  4+11*8, 5+11*8, 4+12*8, 5+12*8
+           db  6+11*8, 7+11*8, 6+12*8, 7+12*8
+           db  4+13*8, 5+13*8, 4+14*8, 5+14*8
+           db  6+13*8, 7+13*8, 6+14*8, 7+14*8
 
 %ifdef PIC
 %define scan8 r11
@@ -306,7 +310,7 @@ INIT_AVX
 IDCT_ADD16INTRA_10 avx
 %endif
 
-%assign last_block 24
+%assign last_block 36
 ;-----------------------------------------------------------------------------
 ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
@@ -317,21 +321,22 @@ cglobal h264_idct_add8_10_%1,5,7
 %endif
     add      r2, 1024
     mov      r0, [r0]
-    ADD16_OP_INTRA %1, 16, 1+1*8
-    ADD16_OP_INTRA %1, 18, 1+2*8
+    ADD16_OP_INTRA %1, 16, 4+ 6*8
+    ADD16_OP_INTRA %1, 18, 4+ 7*8
+    add      r2, 1024-128*2
 %ifdef ARCH_X86_64
     mov      r0, [r10+gprsize]
 %else
     mov      r0, r0m
     mov      r0, [r0+gprsize]
 %endif
-    ADD16_OP_INTRA %1, 20, 1+4*8
-    ADD16_OP_INTRA %1, 22, 1+5*8
+    ADD16_OP_INTRA %1, 32, 4+11*8
+    ADD16_OP_INTRA %1, 34, 4+12*8
     REP_RET
     AC %1, 16
     AC %1, 18
-    AC %1, 20
-    AC %1, 22
+    AC %1, 32
+    AC %1, 34
 
 %endmacro ; IDCT_ADD8
 

From 7b442ad918bfbd1597f94b0a8e00c41468402236 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Thu, 9 Jun 2011 16:17:41 -0700
Subject: [PATCH 801/830] H.264: fix CODEC_FLAG_GRAY

It was broken in 4:4:4, and still did chroma deblocking for no reason in 4:2:0.
---
 libavcodec/h264.c            | 51 ++++++++++---------
 libavcodec/h264_loopfilter.c | 98 ++++++++++++++++++++----------------
 2 files changed, 84 insertions(+), 65 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 86ea218807..78ca4141a4 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -484,6 +484,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
 
+    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
+
     if(chroma444){
         src_cb = pic->data[1] + offset;
         if(emu){
@@ -509,8 +511,6 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         return;
     }
 
-    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
-
     if(MB_FIELD){
         // chroma offset when predicting from a field of opposite parity
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
@@ -1847,24 +1847,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                 for (j = 0; j < 16; j++)
                     tmp_y[j] = get_bits(&gb, bit_depth);
             }
-            for (i = 0; i < 8; i++) {
-                uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
-                for (j = 0; j < 8; j++)
-                    tmp_cb[j] = get_bits(&gb, bit_depth);
-            }
-            for (i = 0; i < 8; i++) {
-                uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
-                for (j = 0; j < 8; j++)
-                    tmp_cr[j] = get_bits(&gb, bit_depth);
+            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                for (i = 0; i < 8; i++) {
+                    uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
+                    for (j = 0; j < 8; j++)
+                        tmp_cb[j] = get_bits(&gb, bit_depth);
+                }
+                for (i = 0; i < 8; i++) {
+                    uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
+                    for (j = 0; j < 8; j++)
+                        tmp_cr[j] = get_bits(&gb, bit_depth);
+                }
             }
         } else {
-        for (i=0; i<16; i++) {
-            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
-        }
-        for (i=0; i<8; i++) {
-            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
-            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
-        }
+            for (i=0; i<16; i++) {
+                memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
+            }
+            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                for (i=0; i<8; i++) {
+                    memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+                    memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
+                }
+            }
         }
     } else {
         if(IS_INTRA(mb_type)){
@@ -1954,8 +1958,9 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
     int i, j, p;
     int *block_offset = &h->block_offset[0];
     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
+    const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
 
-    for (p = 0; p < 3; p++)
+    for (p = 0; p < plane_count; p++)
     {
         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
@@ -1996,7 +2001,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
             GetBitContext gb;
             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
 
-            for (p = 0; p < 3; p++) {
+            for (p = 0; p < plane_count; p++) {
                 for (i = 0; i < 16; i++) {
                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
                     for (j = 0; j < 16; j++)
@@ -2004,7 +2009,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
                 }
             }
         } else {
-            for (p = 0; p < 3; p++) {
+            for (p = 0; p < plane_count; p++) {
                 for (i = 0; i < 16; i++) {
                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
                 }
@@ -2015,7 +2020,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
 
-            for (p = 0; p < 3; p++)
+            for (p = 0; p < plane_count; p++)
                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
 
             if(h->deblocking_filter)
@@ -2035,7 +2040,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
                             h->h264dsp.biweight_h264_pixels_tab, 1);
         }
 
-        for (p = 0; p < 3; p++)
+        for (p = 0; p < plane_count; p++)
             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
     }
     if(h->cbp || IS_INTRA(mb_type))
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index d4ecefcf08..1ae534ec96 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -217,6 +217,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     int mb_xy;
     int mb_type, left_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     mb_xy = h->mb_xy;
 
@@ -262,16 +263,18 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
         }
-        if(left_type){
-            filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
-            filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+        if(chroma){
+            if(left_type){
+                filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+                filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+            }
+            filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
+            filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
+            filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+            filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
+            filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+            filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         }
-        filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
-        filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
-        filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-        filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
-        filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-        filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         return;
     } else {
         LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
@@ -298,7 +301,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 #define FILTER(hv,dir,edge)\
         if(AV_RN64A(bS[dir][edge])) {                                   \
             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
-            if(!(edge&1)) {\
+            if(chroma && !(edge&1)) {\
                 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
             }\
@@ -353,7 +356,7 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     return v;
 }
 
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma444, int dir) {
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
     MpegEncContext * const s = &h->s;
     int edge;
     int chroma_qp_avg[2];
@@ -410,12 +413,14 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
                 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
-                if (chroma444) {
-                    filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                    filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
-                } else {
-                    filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
-                    filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                if (chroma) {
+                    if (chroma444) {
+                        filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                        filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                    } else {
+                        filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
+                        filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
+                    }
                 }
             }
         }else{
@@ -475,7 +480,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
                 if( dir == 0 ) {
                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
-                    {
+                    if (chroma) {
                         if (chroma444) {
                             filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                             filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
@@ -486,7 +491,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                     }
                 } else {
                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
-                    {
+                    if (chroma) {
                         if (chroma444) {
                             filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
                             filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
@@ -557,21 +562,25 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
-            if (chroma444) {
-                filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
-            } else if( (edge&1) == 0 ) {
-                filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+            if (chroma) {
+                if (chroma444) {
+                    filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+                } else if( (edge&1) == 0 ) {
+                    filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+                }
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
-            if (chroma444) {
-                filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
-            } else if( (edge&1) == 0 ) {
-                filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+            if (chroma) {
+                if (chroma444) {
+                    filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+                } else if( (edge&1) == 0 ) {
+                    filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
+                    filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
+                }
             }
         }
     }
@@ -584,6 +593,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     av_unused int dir;
+    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
 
     if (FRAME_MBAFF
             // and current and left pair do not have the same interlaced type
@@ -652,25 +662,29 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         if(MB_FIELD){
             filter_mb_mbaff_edgev ( h, img_y                ,   linesize, bS  , 1, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  + 8*  linesize,   linesize, bS+4, 1, qp [1] );
-            filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
-            filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
+            if (chroma){
+                filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
+                filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
+            }
         }else{
             filter_mb_mbaff_edgev ( h, img_y              , 2*  linesize, bS  , 2, qp [0] );
             filter_mb_mbaff_edgev ( h, img_y  +   linesize, 2*  linesize, bS+1, 2, qp [1] );
-            filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
-            filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
-            filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
+            if (chroma){
+                filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
+                filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
+                filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
+            }
         }
     }
 
 #if CONFIG_SMALL
     for( dir = 0; dir < 2; dir++ )
-        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, CHROMA444, dir);
+        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
 #else
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, CHROMA444, 0);
-    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, CHROMA444, 1);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
+    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
 #endif
 }

From 11177a4d82da8f4987bf6dc755461be6a4e750c5 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Mon, 13 Jun 2011 10:21:46 -0700
Subject: [PATCH 802/830] Fix SVQ3 after adding 4:4:4 H.264 support

---
 libavcodec/svq3.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 7cde5e5552..23ab209312 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -633,8 +633,9 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8);
     }
     if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
-        memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
-        s->dsp.clear_blocks(h->mb);
+        memset(h->non_zero_count_cache + 8, 0, 14*8*sizeof(uint8_t));
+        s->dsp.clear_blocks(h->mb+  0);
+        s->dsp.clear_blocks(h->mb+384);
     }
 
     if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
@@ -654,8 +655,8 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
     }
     if (IS_INTRA16x16(mb_type)) {
-        AV_ZERO128(h->mb_luma_dc+0);
-        AV_ZERO128(h->mb_luma_dc+8);
+        AV_ZERO128(h->mb_luma_dc[0]+0);
+        AV_ZERO128(h->mb_luma_dc[0]+8);
         if (svq3_decode_block(&s->gb, h->mb_luma_dc, 0, 1)){
             av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
             return -1;
@@ -681,20 +682,23 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
         }
 
         if ((cbp & 0x30)) {
-            for (i = 0; i < 2; ++i) {
-              if (svq3_decode_block(&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
+            for (i = 1; i < 3; ++i) {
+              if (svq3_decode_block(&s->gb, &h->mb[16*16*i], 0, 3)){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
                 return -1;
               }
             }
 
             if ((cbp & 0x20)) {
-                for (i = 0; i < 8; i++) {
-                    h->non_zero_count_cache[ scan8[16+i] ] = 1;
+                for (i = 1; i < 3; i++) {
+                    for (j = 0; j < 4; j++) {
+                        k = 16*i + j;
+                        h->non_zero_count_cache[ scan8[k] ] = 1;
 
-                    if (svq3_decode_block(&s->gb, &h->mb[16*(16 + i)], 1, 1)){
-                        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
-                        return -1;
+                        if (svq3_decode_block(&s->gb, &h->mb[16*k], 1, 1)){
+                            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
+                            return -1;
+                        }
                     }
                 }
             }

From 7d2714d1abb432575e667870e0248e2ee77ed9a4 Mon Sep 17 00:00:00 2001
From: Philip Langdale <philipl@overt.org>
Date: Sun, 12 Jun 2011 20:21:28 -0700
Subject: [PATCH 803/830] CrystalHD: Keep mp4toannexb filter around for entire
 decoder lifetime.

In preparation for using the filter on the actual bitstream, we need
to extend it's lifetime to match that of the decoder.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/crystalhd.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 3c8021748f..851abd7ddd 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c
@@ -124,6 +124,7 @@ typedef struct {
     AVFrame pic;
     HANDLE dev;
 
+    AVBitStreamFilterContext *bsfc;
     AVCodecParserContext *parser;
 
     uint8_t is_70012;
@@ -338,6 +339,9 @@ static av_cold int uninit(AVCodecContext *avctx)
     DtsDeviceClose(device);
 
     av_parser_close(priv->parser);
+    if (priv->bsfc) {
+        av_bitstream_filter_close(priv->bsfc);
+    }
 
     av_free(priv->sps_pps_buf);
 
@@ -397,7 +401,6 @@ static av_cold int init(AVCodecContext *avctx)
         {
             uint8_t *dummy_p;
             int dummy_int;
-            AVBitStreamFilterContext *bsfc;
 
             uint32_t orig_data_size = avctx->extradata_size;
             uint8_t *orig_data = av_malloc(orig_data_size);
@@ -409,16 +412,15 @@ static av_cold int init(AVCodecContext *avctx)
             memcpy(orig_data, avctx->extradata, orig_data_size);
 
 
-            bsfc = av_bitstream_filter_init("h264_mp4toannexb");
-            if (!bsfc) {
+            priv->bsfc = av_bitstream_filter_init("h264_mp4toannexb");
+            if (!priv->bsfc) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Cannot open the h264_mp4toannexb BSF!\n");
                 av_free(orig_data);
                 return AVERROR_BSF_NOT_FOUND;
             }
-            av_bitstream_filter_filter(bsfc, avctx, NULL, &dummy_p,
+            av_bitstream_filter_filter(priv->bsfc, avctx, NULL, &dummy_p,
                                        &dummy_int, NULL, 0, 0);
-            av_bitstream_filter_close(bsfc);
 
             priv->sps_pps_buf     = avctx->extradata;
             priv->sps_pps_size    = avctx->extradata_size;

From 4ac5dffc5a4daf315fb908ec689366e179a59ad3 Mon Sep 17 00:00:00 2001
From: Philip Langdale <philipl@overt.org>
Date: Sun, 12 Jun 2011 20:22:20 -0700
Subject: [PATCH 804/830] CrystalHD: Use mp4toannexb bitstream filter.

The H.264 parser that we use to detect interlacing can only handle
an Annex B stream, so we need to actually use the filter. This is
unfortunate as the crystalhd library is already doing this conversion
internally. A future change will reorganise the decode path more
completely so that we can feed the converted stream into libcrystalhd
and avoid the second conversion.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/crystalhd.c | 48 ++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 851abd7ddd..ce1cd55228 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c
@@ -514,6 +514,7 @@ static av_cold int init(AVCodecContext *avctx)
             av_log(avctx, AV_LOG_WARNING,
                    "Cannot open the h.264 parser! Interlaced h.264 content "
                    "will not be detected reliably.\n");
+        priv->parser->flags = PARSER_FLAG_COMPLETE_FRAMES;
     }
     av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Init complete.\n");
 
@@ -833,24 +834,49 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *a
         int32_t tx_free = (int32_t)DtsTxFreeSize(dev);
 
         if (priv->parser) {
-            uint8_t *pout;
-            int psize;
-            const uint8_t *in_data = avpkt->data;
+            uint8_t *in_data = avpkt->data;
             int in_len = len;
-            H264Context *h = priv->parser->priv_data;
+            int ret = 0;
 
-            while (in_len) {
+            if (priv->bsfc) {
+                ret = av_bitstream_filter_filter(priv->bsfc, avctx, NULL,
+                                                 &in_data, &in_len,
+                                                 avpkt->data, len, 0);
+            }
+
+            if (ret >= 0) {
+                uint8_t *pout;
+                int psize;
                 int index;
+                H264Context *h = priv->parser->priv_data;
+
                 index = av_parser_parse2(priv->parser, avctx, &pout, &psize,
                                          in_data, in_len, avctx->pkt->pts,
                                          avctx->pkt->dts, 0);
-                in_data += index;
-                in_len -= index;
+                if (index < 0) {
+                    av_log(avctx, AV_LOG_WARNING,
+                           "CrystalHD: Failed to parse h.264 packet to "
+                           "detect interlacing.\n");
+                } else if (index != in_len) {
+                    av_log(avctx, AV_LOG_WARNING,
+                           "CrystalHD: Failed to parse h.264 packet "
+                           "completely. Interlaced frames may be "
+                           "incorrectly detected\n.");
+                } else {
+                    av_log(avctx, AV_LOG_VERBOSE,
+                           "CrystalHD: parser picture type %d\n",
+                           h->s.picture_structure);
+                    pic_type = h->s.picture_structure;
+                }
+            } else {
+                av_log(avctx, AV_LOG_WARNING,
+                       "CrystalHD: mp4toannexb filter failed to filter "
+                       "packet. Interlaced frames may be incorrectly "
+                       "detected.\n");
+            }
+            if (ret > 0) {
+                av_freep(&in_data);
             }
-            av_log(avctx, AV_LOG_VERBOSE,
-                   "CrystalHD: parser picture type %d\n",
-                   h->s.picture_structure);
-            pic_type = h->s.picture_structure;
         }
 
         if (len < tx_free - 1024) {

From e897a633cded0a8f283114e22766790f48ae8fa7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Jun 2011 00:33:19 +0100
Subject: [PATCH 805/830] ARM: factor some repetitive code into macros

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/mpegvideo_armv5te_s.S |  61 ++++-----
 libavcodec/arm/simple_idct_armv5te.S | 182 +++++++--------------------
 2 files changed, 69 insertions(+), 174 deletions(-)

diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index e83fe991e6..e3461601d5 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -35,6 +35,21 @@
  *
  * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
  */
+
+.macro  dequant_t       dst, src, mul, add, tmp
+        rsbs            \tmp, ip, \src, asr #16
+        addgt           \tmp, \add, #0
+        rsblt           \tmp, \add, #0
+        smlatbne        \dst, \src, \mul, \tmp
+.endm
+
+.macro  dequant_b       dst, src, mul, add, tmp
+        rsbs            \tmp, ip, \src, lsl #16
+        addgt           \tmp, \add, #0
+        rsblt           \tmp, \add, #0
+        smlabbne        \dst, \src, \mul, \tmp
+.endm
+
 function ff_dct_unquantize_h263_armv5te, export=1
         push            {r4-r9,lr}
         mov             ip, #0
@@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1
 1:
         ldrd            r6, [r0, #8]
 
-        rsbs            r9, ip, r4, asr #16
-        addgt           r9, r2, #0
-        rsblt           r9, r2, #0
-        smlatbne        r9, r4, r1, r9
-
-        rsbs            lr, ip, r5, asr #16
-        addgt           lr, r2, #0
-        rsblt           lr, r2, #0
-        smlatbne        lr, r5, r1, lr
-
-        rsbs            r8, ip, r4, asl #16
-        addgt           r8, r2, #0
-        rsblt           r8, r2, #0
-        smlabbne        r4, r4, r1, r8
-
-        rsbs            r8, ip, r5, asl #16
-        addgt           r8, r2, #0
-        rsblt           r8, r2, #0
-        smlabbne        r5, r5, r1, r8
+        dequant_t       r9, r4, r1, r2, r9
+        dequant_t       lr, r5, r1, r2, lr
+        dequant_b       r4, r4, r1, r2, r8
+        dequant_b       r5, r5, r1, r2, r8
 
         strh            r4, [r0], #2
         strh            r9, [r0], #2
         strh            r5, [r0], #2
         strh            lr, [r0], #2
 
-        rsbs            r9, ip, r6, asr #16
-        addgt           r9, r2, #0
-        rsblt           r9, r2, #0
-        smlatbne        r9, r6, r1, r9
-
-        rsbs            lr, ip, r7, asr #16
-        addgt           lr, r2, #0
-        rsblt           lr, r2, #0
-        smlatbne        lr, r7, r1, lr
-
-        rsbs            r8, ip, r6, asl #16
-        addgt           r8, r2, #0
-        rsblt           r8, r2, #0
-        smlabbne        r6, r6, r1, r8
-
-        rsbs            r8, ip, r7, asl #16
-        addgt           r8, r2, #0
-        rsblt           r8, r2, #0
-        smlabbne        r7, r7, r1, r8
+        dequant_t       r9, r6, r1, r2, r9
+        dequant_t       lr, r7, r1, r2, lr
+        dequant_b       r6, r6, r1, r2, r8
+        dequant_b       r7, r7, r1, r2, r8
 
         strh            r6, [r0], #2
         strh            r9, [r0], #2
diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
index 5d03c0c1c3..29ebf5c03c 100644
--- a/libavcodec/arm/simple_idct_armv5te.S
+++ b/libavcodec/arm/simple_idct_armv5te.S
@@ -333,6 +333,20 @@ function idct_col_armv5te
         ldr    pc, [sp], #4
 endfunc
 
+.macro  clip   dst, src:vararg
+        movs   \dst, \src
+        movmi  \dst, #0
+        cmp    \dst, #255
+        movgt  \dst, #255
+.endm
+
+.macro  aclip  dst, src:vararg
+        adds   \dst, \src
+        movmi  \dst, #0
+        cmp    \dst, #255
+        movgt  \dst, #255
+.endm
+
 function idct_col_put_armv5te
         str    lr, [sp, #-4]!
 
@@ -341,27 +355,15 @@ function idct_col_put_armv5te
         ldmfd  sp!, {a3, a4}
         ldr    lr, [sp, #32]
         add    a2, a3, v1
-        movs   a2, a2, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a2, asr #20
         add    ip, a4, v2
-        movs   ip, ip, asr #20
-        movmi  ip, #0
-        cmp    ip, #255
-        movgt  ip, #255
+        clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
         sub    a3, a3, v1
-        movs   a3, a3, asr #20
-        movmi  a3, #0
-        cmp    a3, #255
-        movgt  a3, #255
+        clip   a3, a3, asr #20
         sub    a4, a4, v2
-        movs   a4, a4, asr #20
-        movmi  a4, #0
-        cmp    a4, #255
+        clip   a4, a4, asr #20
         ldr    v1, [sp, #28]
-        movgt  a4, #255
         strh   a2, [v1]
         add    a2, v1, #2
         str    a2, [sp, #28]
@@ -371,79 +373,43 @@ function idct_col_put_armv5te
         strh   a2, [v2, v1]!
 
         sub    a2, a3, v3
-        movs   a2, a2, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a2, asr #20
         sub    ip, a4, v4
-        movs   ip, ip, asr #20
-        movmi  ip, #0
-        cmp    ip, #255
-        movgt  ip, #255
+        clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
         strh   a2, [v1, lr]!
         add    a3, a3, v3
-        movs   a2, a3, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a3, asr #20
         add    a4, a4, v4
-        movs   a4, a4, asr #20
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
         ldmfd  sp!, {a3, a4}
         strh   a2, [v2, -lr]!
 
         add    a2, a3, v5
-        movs   a2, a2, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a2, asr #20
         add    ip, a4, v6
-        movs   ip, ip, asr #20
-        movmi  ip, #0
-        cmp    ip, #255
-        movgt  ip, #255
+        clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
         strh   a2, [v1, lr]!
         sub    a3, a3, v5
-        movs   a2, a3, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a3, asr #20
         sub    a4, a4, v6
-        movs   a4, a4, asr #20
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
         ldmfd  sp!, {a3, a4}
         strh   a2, [v2, -lr]!
 
         add    a2, a3, v7
-        movs   a2, a2, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a2, asr #20
         add    ip, a4, fp
-        movs   ip, ip, asr #20
-        movmi  ip, #0
-        cmp    ip, #255
-        movgt  ip, #255
+        clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
         strh   a2, [v1, lr]
         sub    a3, a3, v7
-        movs   a2, a3, asr #20
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        clip   a2, a3, asr #20
         sub    a4, a4, fp
-        movs   a4, a4, asr #20
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
         strh   a2, [v2, -lr]
 
@@ -460,36 +426,22 @@ function idct_col_add_armv5te
         ldmfd  sp!, {a3, a4}
         ldrh   ip, [lr]
         add    a2, a3, v1
-        mov    a2, a2, asr #20
         sub    a3, a3, v1
         and    v1, ip, #255
-        adds   a2, a2, v1
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        aclip  a2, v1, a2, asr #20
         add    v1, a4, v2
         mov    v1, v1, asr #20
-        adds   v1, v1, ip, lsr #8
-        movmi  v1, #0
-        cmp    v1, #255
-        movgt  v1, #255
+        aclip  v1, v1, ip, lsr #8
         orr    a2, a2, v1, lsl #8
         ldr    v1, [sp, #32]
         sub    a4, a4, v2
         rsb    v2, v1, v1, lsl #3
         ldrh   ip, [v2, lr]!
         strh   a2, [lr]
-        mov    a3, a3, asr #20
         and    a2, ip, #255
-        adds   a3, a3, a2
-        movmi  a3, #0
-        cmp    a3, #255
-        movgt  a3, #255
+        aclip  a3, a2, a3, asr #20
         mov    a4, a4, asr #20
-        adds   a4, a4, ip, lsr #8
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        aclip  a4, a4, ip, lsr #8
         add    a2, lr, #2
         str    a2, [sp, #28]
         orr    a2, a3, a4, lsl #8
@@ -498,102 +450,60 @@ function idct_col_add_armv5te
         ldmfd  sp!, {a3, a4}
         ldrh   ip, [lr, v1]!
         sub    a2, a3, v3
-        mov    a2, a2, asr #20
         add    a3, a3, v3
         and    v3, ip, #255
-        adds   a2, a2, v3
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        aclip  a2, v3, a2, asr #20
         sub    v3, a4, v4
         mov    v3, v3, asr #20
-        adds   v3, v3, ip, lsr #8
-        movmi  v3, #0
-        cmp    v3, #255
-        movgt  v3, #255
+        aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         add    a4, a4, v4
         ldrh   ip, [v2, -v1]!
         strh   a2, [lr]
-        mov    a3, a3, asr #20
         and    a2, ip, #255
-        adds   a3, a3, a2
-        movmi  a3, #0
-        cmp    a3, #255
-        movgt  a3, #255
+        aclip  a3, a2, a3, asr #20
         mov    a4, a4, asr #20
-        adds   a4, a4, ip, lsr #8
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        aclip  a4, a4, ip, lsr #8
         orr    a2, a3, a4, lsl #8
         strh   a2, [v2]
 
         ldmfd  sp!, {a3, a4}
         ldrh   ip, [lr, v1]!
         add    a2, a3, v5
-        mov    a2, a2, asr #20
         sub    a3, a3, v5
         and    v3, ip, #255
-        adds   a2, a2, v3
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        aclip  a2, v3, a2, asr #20
         add    v3, a4, v6
         mov    v3, v3, asr #20
-        adds   v3, v3, ip, lsr #8
-        movmi  v3, #0
-        cmp    v3, #255
-        movgt  v3, #255
+        aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         sub    a4, a4, v6
         ldrh   ip, [v2, -v1]!
         strh   a2, [lr]
-        mov    a3, a3, asr #20
         and    a2, ip, #255
-        adds   a3, a3, a2
-        movmi  a3, #0
-        cmp    a3, #255
-        movgt  a3, #255
+        aclip  a3, a2, a3, asr #20
         mov    a4, a4, asr #20
-        adds   a4, a4, ip, lsr #8
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        aclip  a4, a4, ip, lsr #8
         orr    a2, a3, a4, lsl #8
         strh   a2, [v2]
 
         ldmfd  sp!, {a3, a4}
         ldrh   ip, [lr, v1]!
         add    a2, a3, v7
-        mov    a2, a2, asr #20
         sub    a3, a3, v7
         and    v3, ip, #255
-        adds   a2, a2, v3
-        movmi  a2, #0
-        cmp    a2, #255
-        movgt  a2, #255
+        aclip  a2, v3, a2, asr #20
         add    v3, a4, fp
         mov    v3, v3, asr #20
-        adds   v3, v3, ip, lsr #8
-        movmi  v3, #0
-        cmp    v3, #255
-        movgt  v3, #255
+        aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         sub    a4, a4, fp
         ldrh   ip, [v2, -v1]!
         strh   a2, [lr]
-        mov    a3, a3, asr #20
         and    a2, ip, #255
-        adds   a3, a3, a2
-        movmi  a3, #0
-        cmp    a3, #255
-        movgt  a3, #255
+        aclip  a3, a2, a3, asr #20
         mov    a4, a4, asr #20
-        adds   a4, a4, ip, lsr #8
-        movmi  a4, #0
-        cmp    a4, #255
-        movgt  a4, #255
+        aclip  a4, a4, ip, lsr #8
         orr    a2, a3, a4, lsl #8
         strh   a2, [v2]
 

From cf53c48615658a6019ffb7e8453913bbfd38cb27 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 22:18:06 +0100
Subject: [PATCH 806/830] build: move ALLFFLIBS to a more logical place

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile   | 2 ++
 common.mak | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 0df8a11407..ca07d9fe6d 100644
--- a/Makefile
+++ b/Makefile
@@ -22,6 +22,8 @@ BASENAMES   = ffmpeg ffplay ffprobe ffserver
 ALLPROGS    = $(BASENAMES:%=%$(EXESUF))
 ALLMANPAGES = $(BASENAMES:%=%.1)
 
+ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale
+
 FFLIBS-$(CONFIG_AVDEVICE) += avdevice
 FFLIBS-$(CONFIG_AVFILTER) += avfilter
 FFLIBS-$(CONFIG_AVFORMAT) += avformat
diff --git a/common.mak b/common.mak
index b5ccadbe6e..324868762a 100644
--- a/common.mak
+++ b/common.mak
@@ -25,8 +25,6 @@ $(foreach VAR,$(SILENT),$(eval override $(VAR) = @$($(VAR))))
 $(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_DIR)/%=%)); $(INSTALL))
 endif
 
-ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale
-
 IFLAGS   := -I. -I$(SRC_PATH)
 CPPFLAGS := $(IFLAGS) $(CPPFLAGS)
 CFLAGS   += $(ECFLAGS)

From 0af8a71d66305874bd6f0ebc84ebf99339b6a5d3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 12 Jun 2011 21:53:22 -0400
Subject: [PATCH 807/830] swscale: fix JPEG-range YUV scaling artifacts.

YUV planes were marked as uint16_t, but they contained signed data.
Fixes issue 1108 and 675.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libswscale/swscale.c             | 10 +++++-----
 libswscale/swscale_internal.h    |  4 ++--
 tests/ref/lavf/pixfmt            |  4 ++--
 tests/ref/lavfi/pixdesc_be       |  4 ++--
 tests/ref/lavfi/pixdesc_le       |  4 ++--
 tests/ref/lavfi/pixfmts_copy_le  |  4 ++--
 tests/ref/lavfi/pixfmts_hflip_le |  4 ++--
 tests/ref/lavfi/pixfmts_null_le  |  4 ++--
 tests/ref/lavfi/pixfmts_pad_le   |  4 ++--
 tests/ref/lavfi/pixfmts_scale_le |  4 ++--
 tests/ref/lavfi/pixfmts_vflip_le |  4 ++--
 11 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index ba89a0f4be..a09dba037e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1701,7 +1701,7 @@ static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
 
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
-static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
+static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1709,7 +1709,7 @@ static void chrRangeToJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
     }
 }
-static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
+static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -1717,13 +1717,13 @@ static void chrRangeFromJpeg_c(uint16_t *dstU, uint16_t *dstV, int width)
         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
     }
 }
-static void lumRangeToJpeg_c(uint16_t *dst, int width)
+static void lumRangeToJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++)
         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
 }
-static void lumRangeFromJpeg_c(uint16_t *dst, int width)
+static void lumRangeFromJpeg_c(int16_t *dst, int width)
 {
     int i;
     for (i = 0; i < width; i++)
@@ -1752,7 +1752,7 @@ static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
                                      uint32_t *pal, int isAlpha)
 {
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
-    void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
+    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
     if (toYV12) {
         toYV12(formatConvBuffer, src, srcW, pal);
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 483842e866..ea34d8ce0e 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -310,8 +310,8 @@ typedef struct SwsContext {
                    int xInc, const int16_t *filter, const int16_t *filterPos,
                    int filterSize);
 
-    void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
-    void (*chrConvertRange)(uint16_t *dst1, uint16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
+    void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
+    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
 
     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
diff --git a/tests/ref/lavf/pixfmt b/tests/ref/lavf/pixfmt
index d03abffa8e..186dde5ed3 100644
--- a/tests/ref/lavf/pixfmt
+++ b/tests/ref/lavf/pixfmt
@@ -10,9 +10,9 @@ ac68f9fdd9d55efd0306d9b004038761 *./tests/data/pixfmt/yuyv422.yuv
 304128 ./tests/data/pixfmt/yuv410p.yuv
 8594ea0b8d7c2c964525b0801b5351de *./tests/data/pixfmt/yuv411p.yuv
 304128 ./tests/data/pixfmt/yuv411p.yuv
-66673539adf8cda28e3b76068d3aae61 *./tests/data/pixfmt/yuvj420p.yuv
+e176bd14185788110e055f945de7f95f *./tests/data/pixfmt/yuvj420p.yuv
 304128 ./tests/data/pixfmt/yuvj420p.yuv
-572bf387dd1e3f073cbfd082e055ca81 *./tests/data/pixfmt/yuvj422p.yuv
+472028e46a81c98d9b2477507def4723 *./tests/data/pixfmt/yuvj422p.yuv
 304128 ./tests/data/pixfmt/yuvj422p.yuv
 c10442da177c9f1d12be3c53be6fa12c *./tests/data/pixfmt/yuvj444p.yuv
 304128 ./tests/data/pixfmt/yuvj444p.yuv
diff --git a/tests/ref/lavfi/pixdesc_be b/tests/ref/lavfi/pixdesc_be
index aa20a325d4..830fd2a793 100644
--- a/tests/ref/lavfi/pixdesc_be
+++ b/tests/ref/lavfi/pixdesc_be
@@ -43,6 +43,6 @@ yuv444p16le         1262a0dc57ee147967fc896d04206313
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
-yuvj440p            9c3a093ff64a83ac4cf0b1e65390e236
-yuvj444p            ede1e5882d5c5bba48ea33cf1209d231
+yuvj440p            657501a28004e27a592757a7509f5189
+yuvj444p            98d3d054f2ec09a75eeed5d328dc75b7
 yuyv422             f2569f2b5069a0ee0cecae33de0455e3
diff --git a/tests/ref/lavfi/pixdesc_le b/tests/ref/lavfi/pixdesc_le
index 2078ae1abc..b5afb92ffd 100644
--- a/tests/ref/lavfi/pixdesc_le
+++ b/tests/ref/lavfi/pixdesc_le
@@ -43,6 +43,6 @@ yuv444p16le         1262a0dc57ee147967fc896d04206313
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
-yuvj440p            9c3a093ff64a83ac4cf0b1e65390e236
-yuvj444p            ede1e5882d5c5bba48ea33cf1209d231
+yuvj440p            657501a28004e27a592757a7509f5189
+yuvj444p            98d3d054f2ec09a75eeed5d328dc75b7
 yuyv422             f2569f2b5069a0ee0cecae33de0455e3
diff --git a/tests/ref/lavfi/pixfmts_copy_le b/tests/ref/lavfi/pixfmts_copy_le
index 2078ae1abc..b5afb92ffd 100644
--- a/tests/ref/lavfi/pixfmts_copy_le
+++ b/tests/ref/lavfi/pixfmts_copy_le
@@ -43,6 +43,6 @@ yuv444p16le         1262a0dc57ee147967fc896d04206313
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
-yuvj440p            9c3a093ff64a83ac4cf0b1e65390e236
-yuvj444p            ede1e5882d5c5bba48ea33cf1209d231
+yuvj440p            657501a28004e27a592757a7509f5189
+yuvj444p            98d3d054f2ec09a75eeed5d328dc75b7
 yuyv422             f2569f2b5069a0ee0cecae33de0455e3
diff --git a/tests/ref/lavfi/pixfmts_hflip_le b/tests/ref/lavfi/pixfmts_hflip_le
index c30215e1de..514eed7b3b 100644
--- a/tests/ref/lavfi/pixfmts_hflip_le
+++ b/tests/ref/lavfi/pixfmts_hflip_le
@@ -34,5 +34,5 @@ yuv444p16le         70793e3d66d0c23a0cdedabe9c24c2a7
 yuva420p            d83ec0c01498189f179ec574918185f1
 yuvj420p            df3aaaec3bb157c3bde5f0365af30f4f
 yuvj422p            d113871528d510a192797af59df9c05c
-yuvj440p            e8f7ed76e57c892a1e9e27a3f29452db
-yuvj444p            2c3ae369607608c6dcb0d830f00f971a
+yuvj440p            07f5ff12ced85aba1b5cf51692fff4bb
+yuvj444p            8d95f6b4d4c9b4b0389d36df686bfa46
diff --git a/tests/ref/lavfi/pixfmts_null_le b/tests/ref/lavfi/pixfmts_null_le
index 2078ae1abc..b5afb92ffd 100644
--- a/tests/ref/lavfi/pixfmts_null_le
+++ b/tests/ref/lavfi/pixfmts_null_le
@@ -43,6 +43,6 @@ yuv444p16le         1262a0dc57ee147967fc896d04206313
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
-yuvj440p            9c3a093ff64a83ac4cf0b1e65390e236
-yuvj444p            ede1e5882d5c5bba48ea33cf1209d231
+yuvj440p            657501a28004e27a592757a7509f5189
+yuvj444p            98d3d054f2ec09a75eeed5d328dc75b7
 yuyv422             f2569f2b5069a0ee0cecae33de0455e3
diff --git a/tests/ref/lavfi/pixfmts_pad_le b/tests/ref/lavfi/pixfmts_pad_le
index 73a44f627c..03db5a7efd 100644
--- a/tests/ref/lavfi/pixfmts_pad_le
+++ b/tests/ref/lavfi/pixfmts_pad_le
@@ -13,5 +13,5 @@ yuv444p             45484f0411d336ce94636da0395f4692
 yuva420p            919722724765dc3a716c38fa53b20580
 yuvj420p            4f20e2799966c21a9d9e0788b0956925
 yuvj422p            e4d84b0683f77a76f1c17d976eff127c
-yuvj440p            9ccc1f03d8d9a00dc4a5888bd77093ad
-yuvj444p            7d202babcedf836e9ae9776163ee9425
+yuvj440p            33511c43339aa32533ab832861c150c3
+yuvj444p            82f0badd9d0c062bbfa0d9d73d7240a3
diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index 37dce4f86c..275dce8516 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -43,6 +43,6 @@ yuv444p16le         385d0cc5240d62da0871915be5d86f0a
 yuva420p            8673a9131fb47de69788863f93a50eb7
 yuvj420p            30427bd6caf5bda93a173dbebe759e09
 yuvj422p            fc8288f64fd149573f73cf8da05d8e6d
-yuvj440p            26d0b4713a87ab9637a4062c22e6e70d
-yuvj444p            894e6184d987a5ec4dc6f77bb75ef38c
+yuvj440p            508ac7a9ddeb6d1794a1100ba7a1664c
+yuvj444p            73aebe144085b22d1189caf6ca07e18c
 yuyv422             169e19ac91b257bd84ace0fdf56559ad
diff --git a/tests/ref/lavfi/pixfmts_vflip_le b/tests/ref/lavfi/pixfmts_vflip_le
index 698921a2a2..5100c42412 100644
--- a/tests/ref/lavfi/pixfmts_vflip_le
+++ b/tests/ref/lavfi/pixfmts_vflip_le
@@ -43,6 +43,6 @@ yuv444p16le         8f31557bc52adfe00ae8b40a9b8c23f8
 yuva420p            c705d1cf061d8c6580ac690b55f92276
 yuvj420p            41fd02b204da0ab62452cd14b595e2e4
 yuvj422p            7f6ca9bc1812cde02036d7d29a7cce43
-yuvj440p            40591908cca457f51dee30a86c3e8ffd
-yuvj444p            77e5e095881c52a04fd9f5abd1d7b5ac
+yuvj440p            25711c3c0fd15ec19c59a10784fcfb96
+yuvj444p            e45dee2ac02276dfab92e8ebfbe52e00
 yuyv422             e944ff7316cd03c42c091717ce74f602

From d530e5794475887d5d2b1f05dfd60ba4542b0f82 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Jun 2011 22:00:54 +0100
Subject: [PATCH 808/830] build: move vpath directives to main Makefile

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile   | 5 +++++
 common.mak | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index ca07d9fe6d..084e175b90 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,11 @@ include config.mak
 
 SRC_DIR = $(SRC_PATH_BARE)
 
+vpath %.c   $(SRC_DIR)
+vpath %.h   $(SRC_DIR)
+vpath %.S   $(SRC_DIR)
+vpath %.asm $(SRC_DIR)
+vpath %.v   $(SRC_DIR)
 vpath %.texi $(SRC_PATH_BARE)
 
 PROGS-$(CONFIG_FFMPEG)   += ffmpeg
diff --git a/common.mak b/common.mak
index 324868762a..e9e69f263b 100644
--- a/common.mak
+++ b/common.mak
@@ -6,11 +6,6 @@
 all: all-yes
 
 ifndef SUBDIR
-vpath %.c   $(SRC_DIR)
-vpath %.h   $(SRC_DIR)
-vpath %.S   $(SRC_DIR)
-vpath %.asm $(SRC_DIR)
-vpath %.v   $(SRC_DIR)
 
 ifndef V
 Q      = @

From 9f6321117565f9e673c8c6dbfb4104c09272704e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 14 Jun 2011 15:02:24 -0400
Subject: [PATCH 809/830] swscale: split out RGB48 output functions from
 yuv2packed[12X]_c().

This is part of the Great Evil Plan to simplify swscale. Note that
you'll see some code duplication between the output functions for
different RGB variants, and even between packed-YUV and RGB
variants. This is intentional because it improves readability.
---
 libswscale/swscale.c | 325 +++++++++++++++++++++++++++++++------------
 1 file changed, 236 insertions(+), 89 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index a09dba037e..07f2145062 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -408,6 +408,13 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
+#define output_pixel(pos, val) \
+        if (target == PIX_FMT_GRAY16BE) { \
+            AV_WB16(pos, val); \
+        } else { \
+            AV_WL16(pos, val); \
+        }
+
 static av_always_inline void
 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
                         const int16_t **lumSrc, int lumFilterSize,
@@ -418,12 +425,6 @@ yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
 {
     int i;
 
-#define output_pixel(pos, val) \
-        if (target == PIX_FMT_GRAY16BE) { \
-            AV_WB16(pos, val); \
-        } else { \
-            AV_WL16(pos, val); \
-        }
     for (i = 0; i < (dstW >> 1); i++) {
         int j;
         int Y1 = 1 << 18;
@@ -485,10 +486,11 @@ yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
         output_pixel(&dest[2 * i2 + 0], Y1);
         output_pixel(&dest[2 * i2 + 2], Y2);
     }
-#undef output_pixel
 }
 
-#define YUV2PACKEDWRAPPER(name, ext, fmt) \
+#undef output_pixel
+
+#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                         const int16_t **lumSrc, int lumFilterSize, \
                         const int16_t *chrFilter, const int16_t **chrUSrc, \
@@ -496,7 +498,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                         const int16_t **alpSrc, uint8_t *dest, int dstW, \
                         int y) \
 { \
-    name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                           alpSrc, dest, dstW, y, fmt); \
 } \
@@ -508,7 +510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
                         const uint16_t *abuf1, uint8_t *dest, int dstW, \
                         int yalpha, int uvalpha, int y) \
 { \
-    name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
+    name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
                           vbuf0, vbuf1, abuf0, abuf1, \
                           dest, dstW, yalpha, uvalpha, y, fmt); \
 } \
@@ -520,13 +522,20 @@ static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
                         int uvalpha, enum PixelFormat dstFormat, \
                         int flags, int y) \
 { \
-    name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
+    name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
                           vbuf1, abuf0, dest, dstW, uvalpha, \
                           dstFormat, flags, y, fmt); \
 }
 
-YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
-YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
+YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
+YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
+
+#define output_pixel(pos, acc) \
+    if (target == PIX_FMT_MONOBLACK) { \
+        pos = acc; \
+    } else { \
+        pos = ~acc; \
+    }
 
 static av_always_inline void
 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
@@ -541,12 +550,6 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
     int i;
     int acc = 0;
 
-#define output_pixel(pos, acc) \
-    if (target == PIX_FMT_MONOBLACK) { \
-        pos = acc; \
-    } else { \
-        pos = ~acc; \
-    }
     for (i = 0; i < dstW - 1; i += 2) {
         int j;
         int Y1 = 1 << 18;
@@ -620,21 +623,12 @@ yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
         output_pixel(*dest++, acc);
     }
-#undef output_pixel
 }
 
-YUV2PACKEDWRAPPER(yuv2mono, white, PIX_FMT_MONOWHITE);
-YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
+#undef output_pixel
 
-static av_always_inline void
-yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
-                     const int16_t **lumSrc, int lumFilterSize,
-                     const int16_t *chrFilter, const int16_t **chrUSrc,
-                     const int16_t **chrVSrc, int chrFilterSize,
-                     const int16_t **alpSrc, uint8_t *dest, int dstW,
-                     int y, enum PixelFormat target)
-{
-    int i;
+YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
+YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
 
 #define output_pixels(pos, Y1, U, Y2, V) \
     if (target == PIX_FMT_YUYV422) { \
@@ -649,6 +643,16 @@ yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
         dest[pos + 3] = Y2; \
     }
 
+static av_always_inline void
+yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                     const int16_t **lumSrc, int lumFilterSize,
+                     const int16_t *chrFilter, const int16_t **chrUSrc,
+                     const int16_t **chrVSrc, int chrFilterSize,
+                     const int16_t **alpSrc, uint8_t *dest, int dstW,
+                     int y, enum PixelFormat target)
+{
+    int i;
+
     for (i = 0; i < (dstW >> 1); i++) {
         int j;
         int Y1 = 1 << 18;
@@ -730,11 +734,157 @@ yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
             output_pixels(i * 4, Y1, U, Y2, V);
         }
     }
-#undef output_pixels
 }
 
-YUV2PACKEDWRAPPER(yuv2422, yuyv, PIX_FMT_YUYV422);
-YUV2PACKEDWRAPPER(yuv2422, uyvy, PIX_FMT_UYVY422);
+#undef output_pixels
+
+YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
+YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
+
+#define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
+#define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
+
+static av_always_inline void
+yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc, int chrFilterSize,
+                       const int16_t **alpSrc, uint8_t *dest, int dstW,
+                       int y, enum PixelFormat target)
+{
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+        int U  = 1 << 18;
+        int V  = 1 << 18;
+        const uint8_t *r, *g, *b;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
+            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
+        }
+        for (j = 0; j < chrFilterSize; j++) {
+            U += chrUSrc[j][i] * chrFilter[j];
+            V += chrVSrc[j][i] * chrFilter[j];
+        }
+        Y1 >>= 19;
+        Y2 >>= 19;
+        U  >>= 19;
+        V  >>= 19;
+        if ((Y1 | Y2 | U | V) & 0x100) {
+            Y1 = av_clip_uint8(Y1);
+            Y2 = av_clip_uint8(Y2);
+            U  = av_clip_uint8(U);
+            V  = av_clip_uint8(V);
+        }
+
+        /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
+        r = (const uint8_t *) c->table_rV[V];
+        g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
+        b = (const uint8_t *) c->table_bU[U];
+
+        dest[ 0] = dest[ 1] = r_b[Y1];
+        dest[ 2] = dest[ 3] =   g[Y1];
+        dest[ 4] = dest[ 5] = b_r[Y1];
+        dest[ 6] = dest[ 7] = r_b[Y2];
+        dest[ 8] = dest[ 9] =   g[Y2];
+        dest[10] = dest[11] = b_r[Y2];
+        dest += 12;
+    }
+}
+
+static av_always_inline void
+yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
+                       const uint16_t *buf1, const uint16_t *ubuf0,
+                       const uint16_t *ubuf1, const uint16_t *vbuf0,
+                       const uint16_t *vbuf1, const uint16_t *abuf0,
+                       const uint16_t *abuf1, uint8_t *dest, int dstW,
+                       int yalpha, int uvalpha, int y,
+                       enum PixelFormat target)
+{
+    int  yalpha1 = 4095 - yalpha;
+    int uvalpha1 = 4095 - uvalpha;
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
+        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
+        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
+        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
+        const uint8_t *r = (const uint8_t *) c->table_rV[V],
+                      *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
+                      *b = (const uint8_t *) c->table_bU[U];
+
+        dest[ 0] = dest[ 1] = r_b[Y1];
+        dest[ 2] = dest[ 3] =   g[Y1];
+        dest[ 4] = dest[ 5] = b_r[Y1];
+        dest[ 6] = dest[ 7] = r_b[Y2];
+        dest[ 8] = dest[ 9] =   g[Y2];
+        dest[10] = dest[11] = b_r[Y2];
+        dest += 12;
+    }
+}
+
+static av_always_inline void
+yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
+                       const uint16_t *ubuf0, const uint16_t *ubuf1,
+                       const uint16_t *vbuf0, const uint16_t *vbuf1,
+                       const uint16_t *abuf0, uint8_t *dest, int dstW,
+                       int uvalpha, enum PixelFormat dstFormat,
+                       int flags, int y, enum PixelFormat target)
+{
+    int i;
+
+    if (uvalpha < 2048) {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 = buf0[i * 2]     >> 7;
+            int Y2 = buf0[i * 2 + 1] >> 7;
+            int U  = ubuf1[i]        >> 7;
+            int V  = vbuf1[i]        >> 7;
+            const uint8_t *r = (const uint8_t *) c->table_rV[V],
+                          *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
+                          *b = (const uint8_t *) c->table_bU[U];
+
+            dest[ 0] = dest[ 1] = r_b[Y1];
+            dest[ 2] = dest[ 3] =   g[Y1];
+            dest[ 4] = dest[ 5] = b_r[Y1];
+            dest[ 6] = dest[ 7] = r_b[Y2];
+            dest[ 8] = dest[ 9] =   g[Y2];
+            dest[10] = dest[11] = b_r[Y2];
+            dest += 12;
+        }
+    } else {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 =  buf0[i * 2]          >> 7;
+            int Y2 =  buf0[i * 2 + 1]      >> 7;
+            int U  = (ubuf0[i] + ubuf1[i]) >> 8;
+            int V  = (vbuf0[i] + vbuf1[i]) >> 8;
+            /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
+            const uint8_t *r = (const uint8_t *) c->table_rV[V],
+                          *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
+                          *b = (const uint8_t *) c->table_bU[U];
+
+            dest[ 0] = dest[ 1] = r_b[Y1];
+            dest[ 2] = dest[ 3] =   g[Y1];
+            dest[ 4] = dest[ 5] = b_r[Y1];
+            dest[ 6] = dest[ 7] = r_b[Y2];
+            dest[ 8] = dest[ 9] =   g[Y2];
+            dest[10] = dest[11] = b_r[Y2];
+            dest += 12;
+        }
+    }
+}
+
+#undef r_b
+#undef b_r
+
+YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
+//YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
+YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
+//YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
 
 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
@@ -876,36 +1026,6 @@ YUV2PACKEDWRAPPER(yuv2422, uyvy, PIX_FMT_UYVY422);
 
 #define YSCALE_YUV_2_ANYRGB_C(func)\
     switch(c->dstFormat) {\
-    case PIX_FMT_RGB48BE:\
-    case PIX_FMT_RGB48LE:\
-        func(uint8_t,0)\
-            ((uint8_t*)dest)[ 0]= r[Y1];\
-            ((uint8_t*)dest)[ 1]= r[Y1];\
-            ((uint8_t*)dest)[ 2]= g[Y1];\
-            ((uint8_t*)dest)[ 3]= g[Y1];\
-            ((uint8_t*)dest)[ 4]= b[Y1];\
-            ((uint8_t*)dest)[ 5]= b[Y1];\
-            ((uint8_t*)dest)[ 6]= r[Y2];\
-            ((uint8_t*)dest)[ 7]= r[Y2];\
-            ((uint8_t*)dest)[ 8]= g[Y2];\
-            ((uint8_t*)dest)[ 9]= g[Y2];\
-            ((uint8_t*)dest)[10]= b[Y2];\
-            ((uint8_t*)dest)[11]= b[Y2];\
-            dest+=12;\
-        }\
-        break;\
-    case PIX_FMT_BGR48BE:\
-    case PIX_FMT_BGR48LE:\
-        func(uint8_t,0)\
-            ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\
-            ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\
-            ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\
-            ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\
-            ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\
-            ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\
-            dest+=12;\
-        }\
-        break;\
     case PIX_FMT_RGBA:\
     case PIX_FMT_BGRA:\
         if (CONFIG_SMALL) {\
@@ -1196,19 +1316,21 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
     }
 }
 
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+
+#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
+#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
+
 static av_always_inline void
 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
                     enum PixelFormat origin)
 {
     int i;
     for (i = 0; i < width; i++) {
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-        int a = input_pixel(&src[i*6+0]) >> 8;
-        int g = input_pixel(&src[i*6+2]) >> 8;
-        int c = input_pixel(&src[i*6+4]) >> 8;
+        int r_b = input_pixel(&src[i*6+0]) >> 8;
+        int   g = input_pixel(&src[i*6+2]) >> 8;
+        int b_r = input_pixel(&src[i*6+4]) >> 8;
 
-#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? c : a)
-#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? a : c)
         dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
     }
 }
@@ -1221,9 +1343,9 @@ rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
     int i;
     assert(src1==src2);
     for (i = 0; i < width; i++) {
-        int a = input_pixel(&src1[6*i + 0]) >> 8;
-        int g = input_pixel(&src1[6*i + 2]) >> 8;
-        int c = input_pixel(&src1[6*i + 4]) >> 8;
+        int r_b = input_pixel(&src1[i*6+0]) >> 8;
+        int   g = input_pixel(&src1[i*6+2]) >> 8;
+        int b_r = input_pixel(&src1[i*6+4]) >> 8;
 
         dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
         dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
@@ -1238,17 +1360,18 @@ rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
     int i;
     assert(src1==src2);
     for (i = 0; i < width; i++) {
-        int a = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
-        int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
-        int c = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
+        int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
+        int   g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
+        int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
 
         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
     }
+}
+
 #undef r
 #undef b
 #undef input_pixel
-}
 
 #define rgb48funcs(pattern, BE_LE, origin) \
 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
@@ -1276,6 +1399,10 @@ rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
 
+#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
+                         origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
+                        (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
+
 static av_always_inline void
 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
                        int width, enum PixelFormat origin,
@@ -1288,9 +1415,6 @@ rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
     int i;
 
     for (i = 0; i < width; i++) {
-#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
-                         origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
-                        (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
         int px = input_pixel(i) >> shp;
         int b = (px & maskb) >> shb;
         int g = (px & maskg) >> shg;
@@ -1356,9 +1480,10 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
     }
-#undef input_pixel
 }
 
+#undef input_pixel
+
 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
                          maskg, maskb, rsh, gsh, bsh, S) \
 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
@@ -1545,6 +1670,8 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+
 // FIXME Maybe dither instead.
 static av_always_inline void
 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
@@ -1555,7 +1682,6 @@ yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
     const uint16_t *srcU = (const uint16_t *) _srcU;
     const uint16_t *srcV = (const uint16_t *) _srcV;
 
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
     for (i = 0; i < width; i++) {
         dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
         dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
@@ -1571,9 +1697,10 @@ yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
 
     for (i = 0; i < width; i++)
         dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
-#undef input_pixel
 }
 
+#undef input_pixel
+
 #define YUV_NBPS(depth, BE_LE, origin) \
 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
                                      const uint8_t *srcU, const uint8_t *srcV, \
@@ -1855,14 +1982,34 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packedX = yuv2monoblack_X_c;
             break;
         case PIX_FMT_YUYV422:
-            *yuv2packed1 = yuv2422yuyv_1_c;
-            *yuv2packed2 = yuv2422yuyv_2_c;
-            *yuv2packedX = yuv2422yuyv_X_c;
+            *yuv2packed1 = yuv2yuyv422_1_c;
+            *yuv2packed2 = yuv2yuyv422_2_c;
+            *yuv2packedX = yuv2yuyv422_X_c;
             break;
         case PIX_FMT_UYVY422:
-            *yuv2packed1 = yuv2422uyvy_1_c;
-            *yuv2packed2 = yuv2422uyvy_2_c;
-            *yuv2packedX = yuv2422uyvy_X_c;
+            *yuv2packed1 = yuv2uyvy422_1_c;
+            *yuv2packed2 = yuv2uyvy422_2_c;
+            *yuv2packedX = yuv2uyvy422_X_c;
+            break;
+        case PIX_FMT_RGB48LE:
+            //*yuv2packed1 = yuv2rgb48le_1_c;
+            //*yuv2packed2 = yuv2rgb48le_2_c;
+            //*yuv2packedX = yuv2rgb48le_X_c;
+            //break;
+        case PIX_FMT_RGB48BE:
+            *yuv2packed1 = yuv2rgb48be_1_c;
+            *yuv2packed2 = yuv2rgb48be_2_c;
+            *yuv2packedX = yuv2rgb48be_X_c;
+            break;
+        case PIX_FMT_BGR48LE:
+            //*yuv2packed1 = yuv2bgr48le_1_c;
+            //*yuv2packed2 = yuv2bgr48le_2_c;
+            //*yuv2packedX = yuv2bgr48le_X_c;
+            //break;
+        case PIX_FMT_BGR48BE:
+            *yuv2packed1 = yuv2bgr48be_1_c;
+            *yuv2packed2 = yuv2bgr48be_2_c;
+            *yuv2packedX = yuv2bgr48be_X_c;
             break;
         default:
             *yuv2packed1 = yuv2packed1_c;

From 0be3736796b1651ad785c9ba68d26c2276cc074c Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 14 Jun 2011 15:35:05 -0400
Subject: [PATCH 810/830] ffmpeg: fix streaming to ffserver.

---
 ffmpeg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ffmpeg.c b/ffmpeg.c
index cbcdba8c64..04672cc831 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -684,6 +684,7 @@ static int read_ffserver_streams(AVFormatContext *s, const char *filename)
         // FIXME: a more elegant solution is needed
         st = av_mallocz(sizeof(AVStream));
         memcpy(st, ic->streams[i], sizeof(AVStream));
+        st->info = NULL;
         st->codec = avcodec_alloc_context();
         if (!st->codec) {
             print_error(filename, AVERROR(ENOMEM));

From 4e0583020567dd2062a908fe59aacba484e68049 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 14 Jun 2011 16:55:24 -0400
Subject: [PATCH 811/830] swscale: remove misplaced comment.

The comment should have been placed only in
yuv2rgb48_X_c_template, not yuv2rgb48_1_c_template.
---
 libswscale/swscale.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 07f2145062..f729c117a7 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -862,7 +862,6 @@ yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
             int Y2 =  buf0[i * 2 + 1]      >> 7;
             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
-            /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
             const uint8_t *r = (const uint8_t *) c->table_rV[V],
                           *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
                           *b = (const uint8_t *) c->table_bU[U];

From b19313218c32fa9446d474cbddb11c0776868cd3 Mon Sep 17 00:00:00 2001
From: William Yu <genwillyu@gmail.com>
Date: Tue, 14 Jun 2011 17:16:51 +0200
Subject: [PATCH 812/830] mjpegenc: Fix JFIF version

---
 libavcodec/mjpegenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index db477a0ab6..b721ab3580 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c
@@ -162,7 +162,7 @@ static void jpeg_put_comments(MpegEncContext *s)
     put_marker(p, APP0);
     put_bits(p, 16, 16);
     ff_put_string(p, "JFIF", 1); /* this puts the trailing zero-byte too */
-    put_bits(p, 16, 0x0201); /* v 1.02 */
+    put_bits(p, 16, 0x0102); /* v 1.02 */
     put_bits(p, 8, 0); /* units type: 0 - aspect ratio */
     put_bits(p, 16, s->avctx->sample_aspect_ratio.num);
     put_bits(p, 16, s->avctx->sample_aspect_ratio.den);

From 97e057ff814c253c770f011736e33c0b65c9c663 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 15 Jun 2011 00:52:43 +0200
Subject: [PATCH 813/830] swscale: Fix compilation with --disable-mmx2.

Some MMX2 functions were being referenced without proper #ifdefs.
---
 libswscale/x86/swscale_mmx.c | 2 ++
 libswscale/x86/yuv2rgb_mmx.c | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index c86f75df51..f855a75212 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -182,6 +182,8 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
 
     if (cpu_flags & AV_CPU_FLAG_MMX)
         sws_init_swScale_MMX(c);
+#if HAVE_MMX2
     if (cpu_flags & AV_CPU_FLAG_MMX2)
         sws_init_swScale_MMX2(c);
+#endif
 }
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 439482bb71..50f475a1ba 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -72,12 +72,14 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
         c->srcFormat != PIX_FMT_YUVA420P)
         return NULL;
 
-    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
+#if HAVE_MMX2
+    if (cpu_flags & AV_CPU_FLAG_MMX2) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2;
         case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2;
         }
     }
+#endif
 
     if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {

From a60466dbc3aededb0a1fab96d7fe2286f4c1a8f7 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 15 Jun 2011 00:56:31 +0200
Subject: [PATCH 814/830] swscale: Remove HAVE_MMX from files that are only
 compiled with MMX enabled.

---
 libswscale/x86/rgb2rgb.c     | 2 +-
 libswscale/x86/yuv2rgb_mmx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 97c50dd636..282618c301 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -127,7 +127,7 @@ void rgb2rgb_init_x86(void)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (HAVE_MMX      && cpu_flags & AV_CPU_FLAG_MMX)
+    if (cpu_flags & AV_CPU_FLAG_MMX)
         rgb2rgb_init_MMX();
     if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
         rgb2rgb_init_3DNOW();
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 50f475a1ba..0eaea77485 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -81,7 +81,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
     }
 #endif
 
-    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
+    if (cpu_flags & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
             case PIX_FMT_RGB32:
                 if (c->srcFormat == PIX_FMT_YUVA420P) {

From 9e2f448d68d9df7ad79d968db315c6b0cc79c4df Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 15 Jun 2011 01:36:12 +0200
Subject: [PATCH 815/830] vf_mp: Fix large memleak.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavfilter/vf_mp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vf_mp.c b/libavfilter/vf_mp.c
index 01a8f064a2..0642b44f00 100644
--- a/libavfilter/vf_mp.c
+++ b/libavfilter/vf_mp.c
@@ -882,7 +882,7 @@ static void end_frame(AVFilterLink *inlink)
     }
     free_mp_image(mpi);
 
-//    avfilter_unref_buffer(inpic);
+    avfilter_unref_buffer(inpic);
 }
 
 AVFilter avfilter_vf_mp = {

From 19b8c988286cbc827d6c0d2569ee88af323262f1 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 15 Jun 2011 10:47:46 +0200
Subject: [PATCH 816/830] Fix compilation with --disable-everything
 --enable-encoder=ac3/ac3_fixed.

---
 libavcodec/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f4613749a0..b6103af3c0 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -62,9 +62,9 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                           mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
 OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
-OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_combined.o ac3enc_fixed.o ac3enc_float.o ac3tab.o ac3.o kbdwin.o
+OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_combined.o ac3enc_fixed.o ac3enc_float.o ac3tab.o ac3.o kbdwin.o ac3enc.o
 OBJS-$(CONFIG_AC3_FLOAT_ENCODER)       += ac3enc_float.o ac3tab.o ac3tab.o ac3.o kbdwin.o
-OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3tab.o ac3.o
+OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3tab.o ac3.o ac3enc.o
 OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
 OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o
 OBJS-$(CONFIG_ALS_DECODER)             += alsdec.o bgmc.o mpeg4audio.o

From b1c450be49d9896ce4c96d04944f5f15e2ced028 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 15 Jun 2011 10:53:18 +0200
Subject: [PATCH 817/830] Fix "redundant redeclaration" warning.

---
 libavcodec/ac3enc.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 563dcd532e..2d04f26303 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -243,8 +243,6 @@ typedef struct AC3EncodeContext {
 } AC3EncodeContext;
 
 
-extern const int64_t ff_ac3_channel_layouts[19];
-
 int ff_ac3_encode_init(AVCodecContext *avctx);
 
 int ff_ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,

From b3452771c4550b46ea54d15be6cfe8cc10585199 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 15 Jun 2011 10:57:26 +0200
Subject: [PATCH 818/830] Fix multi-channel AAC encoding.

Fixes ticket 55.
---
 libavcodec/aacenc.c  | 20 +++++++++++++++++++-
 libavcodec/libfaac.c | 10 ++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 66af2b1915..16c59061dc 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -135,6 +135,15 @@ static const uint8_t aac_chan_configs[6][5] = {
  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 };
 
+static const uint8_t channel_maps[][AAC_MAX_CHANNELS] = {
+    { 0 },
+    { 0, 1 },
+    { 2, 0, 1 },
+    { 2, 0, 1, 3 },
+    { 2, 0, 1, 3, 4 },
+    { 2, 0, 1, 4, 5, 3 },
+};
+
 /**
  * Make AAC audio config object.
  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
@@ -499,15 +508,24 @@ static int aac_encode_frame(AVCodecContext *avctx,
         return 0;
     if (data) {
         if (!s->psypp) {
+            if (avctx->channels <= 2) {
             memcpy(s->samples + 1024 * avctx->channels, data,
                    1024 * avctx->channels * sizeof(s->samples[0]));
+            } else {
+                for (i = 0; i < 1024; i++)
+                    for (ch = 0; ch < avctx->channels; ch++)
+                        s->samples[(i + 1024) * avctx->channels + ch] =
+                            ((int16_t*)data)[i * avctx->channels +
+                                             channel_maps[avctx->channels-1][ch]];
+            }
         } else {
             start_ch = 0;
             samples2 = s->samples + 1024 * avctx->channels;
             for (i = 0; i < chan_map[0]; i++) {
                 tag = chan_map[i+1];
                 chans = tag == TYPE_CPE ? 2 : 1;
-                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch,
+                ff_psy_preprocess(s->psypp,
+                                  (uint16_t*)data + channel_maps[avctx->channels-1][start_ch],
                                   samples2 + start_ch, start_ch, chans);
                 start_ch += chans;
             }
diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c
index af85587272..2acc682581 100644
--- a/libavcodec/libfaac.c
+++ b/libavcodec/libfaac.c
@@ -31,6 +31,13 @@ typedef struct FaacAudioContext {
     faacEncHandle faac_handle;
 } FaacAudioContext;
 
+static const int channel_maps[][6] = {
+    { 2, 0, 1 },          //< C L R
+    { 2, 0, 1, 3 },       //< C L R Cs
+    { 2, 0, 1, 3, 4 },    //< C L R Ls Rs
+    { 2, 0, 1, 4, 5, 3 }, //< C L R Ls Rs LFE
+};
+
 static av_cold int Faac_encode_init(AVCodecContext *avctx)
 {
     FaacAudioContext *s = avctx->priv_data;
@@ -86,6 +93,9 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
     }
     faac_cfg->outputFormat = 1;
     faac_cfg->inputFormat = FAAC_INPUT_16BIT;
+    if (avctx->channels > 2)
+        memcpy(faac_cfg->channel_map, channel_maps[avctx->channels-3],
+               avctx->channels * sizeof(int));
 
     avctx->frame_size = samples_input / avctx->channels;
 

From dbf23d191a808734679bd7d0c8737c082ad24a72 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Wed, 15 Jun 2011 10:58:25 +0200
Subject: [PATCH 819/830] Reindent after last commit.

---
 libavcodec/aacenc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 16c59061dc..0de6622389 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -509,8 +509,8 @@ static int aac_encode_frame(AVCodecContext *avctx,
     if (data) {
         if (!s->psypp) {
             if (avctx->channels <= 2) {
-            memcpy(s->samples + 1024 * avctx->channels, data,
-                   1024 * avctx->channels * sizeof(s->samples[0]));
+                memcpy(s->samples + 1024 * avctx->channels, data,
+                       1024 * avctx->channels * sizeof(s->samples[0]));
             } else {
                 for (i = 0; i < 1024; i++)
                     for (ch = 0; ch < avctx->channels; ch++)

From 4f4d1358ae30c2b8588f4dc6b3ef964a048004e8 Mon Sep 17 00:00:00 2001
From: Andrew Wason <rectalogic@rectalogic.com>
Date: Wed, 15 Jun 2011 11:09:17 +0200
Subject: [PATCH 820/830] Support reading chan atoms with empty channel
 descriptions.

Fixes ticket 263.
---
 libavformat/isom.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavformat/isom.c b/libavformat/isom.c
index 45ccdd2864..dcc3b74af3 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -459,16 +459,18 @@ void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec)
     uint32_t layout_tag;
     AVIOContext *pb = s->pb;
     const MovChannelLayout *layouts = mov_channel_layout;
-    if (size != 12) {
+    layout_tag = avio_rb32(pb);
+    size -= 4;
+    if (layout_tag == 0) { //< kCAFChannelLayoutTag_UseChannelDescriptions
         // Channel descriptions not implemented
         av_log_ask_for_sample(s, "Unimplemented container channel layout.\n");
         avio_skip(pb, size);
         return;
     }
-    layout_tag = avio_rb32(pb);
     if (layout_tag == 0x10000) { //< kCAFChannelLayoutTag_UseChannelBitmap
         codec->channel_layout = avio_rb32(pb);
-        avio_skip(pb, 4);
+        size -= 4;
+        avio_skip(pb, size);
         return;
     }
     while (layouts->channel_layout) {
@@ -480,7 +482,7 @@ void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec)
     }
     if (!codec->channel_layout)
         av_log(s, AV_LOG_WARNING, "Unknown container channel layout.\n");
-    avio_skip(pb, 8);
+    avio_skip(pb, size);
 }
 
 void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout)

From 3636e791ec295dcea3c1ce0206d944cd5c76a650 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Tue, 26 Apr 2011 00:27:48 +0200
Subject: [PATCH 821/830] swscale: use SwsContext for av_log when available

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libswscale/utils.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 213bf3a043..d048b22e24 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -787,11 +787,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     unscaled = (srcW == dstW && srcH == dstH);
 
     if (!isSupportedIn(srcFormat)) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
+        av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", sws_format_name(srcFormat));
         return AVERROR(EINVAL);
     }
     if (!isSupportedOut(dstFormat)) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
+        av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", sws_format_name(dstFormat));
         return AVERROR(EINVAL);
     }
 
@@ -807,12 +807,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
                 |SWS_SPLINE
                 |SWS_BICUBLIN);
     if(!i || (i & (i-1))) {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
+        av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen\n");
         return AVERROR(EINVAL);
     }
     /* sanity check */
     if (srcW<4 || srcH<1 || dstW<8 || dstH<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
+        av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n",
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }

From 88ff180ad66d5b12f5ee0ffbda891b467725a8d3 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 15 Jun 2011 12:58:00 +0100
Subject: [PATCH 822/830] ARM: update ff_h264_idct8_add4_neon for 4:4:4 changes

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/h264dsp_init_arm.c |  3 +--
 libavcodec/arm/h264idct_neon.S    | 41 ++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 483b26ab02..c2399e50ff 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -122,8 +122,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
     c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
     c->h264_idct_add16      = ff_h264_idct_add16_neon;
     c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
-    //FIXME: reenable when asm is updated.
-    //c->h264_idct_add8       = ff_h264_idct_add8_neon;
+    c->h264_idct_add8       = ff_h264_idct_add8_neon;
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index b7253542a8..3c743e1607 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -148,24 +148,27 @@ function ff_h264_idct_add8_neon, export=1
         add             r5,  r1,  #16*4
         add             r1,  r2,  #16*32
         mov             r2,  r3
+        mov             r3,  r1
         ldr             r6,  [sp, #32]
         movrel          r7,  scan8+16
-        mov             ip,  #7
-1:      ldrb            r8,  [r7], #1
-        ldr             r0,  [r5], #4
+        mov             r12, #0
+1:      ldrb            r8,  [r7, r12]
+        ldr             r0,  [r5, r12, lsl #2]
         ldrb            r8,  [r6, r8]
-        tst             ip,  #4
-        addne           r0,  r0,  r4
-        addeq           r0,  r0,  r9
+        add             r0,  r0,  r4
+        add             r1,  r3,  r12, lsl #5
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         adrne           lr,  ff_h264_idct_add_neon
         adreq           lr,  ff_h264_idct_dc_add_neon
         cmpeq           r8,  #0
         blxne           lr
-        subs            ip,  ip,  #1
-        add             r1,  r1,  #32
-        bge             1b
+        add             r12, r12, #1
+        cmp             r12, #4
+        moveq           r12, #16
+        moveq           r4,  r9
+        cmp             r12, #20
+        blt             1b
         pop             {r4-r10,pc}
 endfunc
 
@@ -374,11 +377,15 @@ function ff_h264_idct8_add4_neon, export=1
 endfunc
 
         .section .rodata
-scan8:  .byte           4+1*8, 5+1*8, 4+2*8, 5+2*8
-        .byte           6+1*8, 7+1*8, 6+2*8, 7+2*8
-        .byte           4+3*8, 5+3*8, 4+4*8, 5+4*8
-        .byte           6+3*8, 7+3*8, 6+4*8, 7+4*8
-        .byte           1+1*8, 2+1*8
-        .byte           1+2*8, 2+2*8
-        .byte           1+4*8, 2+4*8
-        .byte           1+5*8, 2+5*8
+scan8:  .byte           4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
+        .byte           6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
+        .byte           4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
+        .byte           6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
+        .byte           4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
+        .byte           6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
+        .byte           4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
+        .byte           6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
+        .byte           4+11*8, 5+11*8, 4+12*8, 5+12*8
+        .byte           6+11*8, 7+11*8, 6+12*8, 7+12*8
+        .byte           4+13*8, 5+13*8, 4+14*8, 5+14*8
+        .byte           6+13*8, 7+13*8, 6+14*8, 7+14*8

From e7c7b2d87842c689ae7da4d6d636a08cb1e9ce88 Mon Sep 17 00:00:00 2001
From: Luca Abeni <lucabe72@email.it>
Date: Wed, 15 Jun 2011 17:59:03 +0200
Subject: [PATCH 823/830] Full support for sending H.264 in RTP

This implements support for the "MP4" syntax of H.264 bitstreams.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/rtpenc_h264.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/libavformat/rtpenc_h264.c b/libavformat/rtpenc_h264.c
index 697def61c2..be9cb2c774 100644
--- a/libavformat/rtpenc_h264.c
+++ b/libavformat/rtpenc_h264.c
@@ -29,6 +29,24 @@
 #include "avc.h"
 #include "rtpenc.h"
 
+static const uint8_t *avc_mp4_find_startcode(const uint8_t *start, const uint8_t *end, int nal_lenght_size)
+{
+    int res = 0;
+
+    if (end - start < nal_lenght_size) {
+        return NULL;
+    }
+    while (nal_lenght_size--) {
+        res = (res << 8) | *start++;
+    }
+
+    if (res + start > end) {
+        return NULL;
+    }
+
+    return res + start;
+}
+
 static void nal_send(AVFormatContext *s1, const uint8_t *buf, int size, int last)
 {
     RTPMuxContext *s = s1->priv_data;
@@ -66,12 +84,20 @@ void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size)
     RTPMuxContext *s = s1->priv_data;
 
     s->timestamp = s->cur_timestamp;
-    r = ff_avc_find_startcode(buf1, buf1 + size);
+    r = s->nal_length_size ? (avc_mp4_find_startcode(buf1, buf1 + size, s->nal_length_size) ? buf1 : buf1 + size) : ff_avc_find_startcode(buf1, buf1 + size);
     while (r < buf1 + size) {
         const uint8_t *r1;
 
-        while(!*(r++));
-        r1 = ff_avc_find_startcode(r, buf1 + size);
+        if (s->nal_length_size) {
+            r1 = avc_mp4_find_startcode(r, buf1 + size, s->nal_length_size);
+            if (!r1) {
+                r1 = buf1 + size;
+            }
+            r += s->nal_length_size;
+        } else {
+            while(!*(r++));
+            r1 = ff_avc_find_startcode(r, buf1 + size);
+        }
         nal_send(s1, r, r1 - r, (r1 == buf1 + size));
         r = r1;
     }

From c940c31c88494c6accbdb0df3b715bd1f161368f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 15 Jun 2011 20:20:47 +0200
Subject: [PATCH 824/830] Fix spelling.

---
 libavformat/rtpenc_h264.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/rtpenc_h264.c b/libavformat/rtpenc_h264.c
index be9cb2c774..0f8850ee77 100644
--- a/libavformat/rtpenc_h264.c
+++ b/libavformat/rtpenc_h264.c
@@ -29,14 +29,14 @@
 #include "avc.h"
 #include "rtpenc.h"
 
-static const uint8_t *avc_mp4_find_startcode(const uint8_t *start, const uint8_t *end, int nal_lenght_size)
+static const uint8_t *avc_mp4_find_startcode(const uint8_t *start, const uint8_t *end, int nal_length_size)
 {
     int res = 0;
 
-    if (end - start < nal_lenght_size) {
+    if (end - start < nal_length_size) {
         return NULL;
     }
-    while (nal_lenght_size--) {
+    while (nal_length_size--) {
         res = (res << 8) | *start++;
     }
 

From 7f2228dbfec0268b8b486573041043c0de1aa381 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 15 Jun 2011 20:21:33 +0200
Subject: [PATCH 825/830] Make buffer size check consistent and avoid a
 possible overflow.

---
 libavformat/rtpenc_h264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/rtpenc_h264.c b/libavformat/rtpenc_h264.c
index 0f8850ee77..11074d0d51 100644
--- a/libavformat/rtpenc_h264.c
+++ b/libavformat/rtpenc_h264.c
@@ -40,7 +40,7 @@ static const uint8_t *avc_mp4_find_startcode(const uint8_t *start, const uint8_t
         res = (res << 8) | *start++;
     }
 
-    if (res + start > end) {
+    if (end - start < res) {
         return NULL;
     }
 

From d7ee44024c96ebdbcd718885a77e9a07779df54c Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 15 Jun 2011 08:00:03 +0200
Subject: [PATCH 826/830] ffmpeg: don't abuse a global for passing samplerate
 from input to output

It's broken with multiple files or audio streams.

This removes the default samplerate of 44100 for raw input, hence all
the FATE changes.
---
 ffmpeg.c                  | 24 +++++++++++++-----------
 tests/fate2.mak           |  2 +-
 tests/lavf-regression.sh  | 12 ++++++------
 tests/regression-funcs.sh |  2 +-
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 04672cc831..1a00bdbb5b 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -163,7 +163,7 @@ static char *vfilters = NULL;
 #endif
 
 static int intra_only = 0;
-static int audio_sample_rate = 44100;
+static int audio_sample_rate = 0;
 static int64_t channel_layout = 0;
 #define QSCALE_NONE -99999
 static float audio_qscale = QSCALE_NONE;
@@ -2170,6 +2170,13 @@ static int transcode(AVFormatContext **output_files,
                 if(!ost->fifo)
                     goto fail;
                 ost->reformat_pair = MAKE_SFMT_PAIR(AV_SAMPLE_FMT_NONE,AV_SAMPLE_FMT_NONE);
+                if (!codec->sample_rate) {
+                    codec->sample_rate = icodec->sample_rate;
+                    if (icodec->lowres)
+                        codec->sample_rate >>= icodec->lowres;
+                }
+                choose_sample_rate(ost->st, codec->codec);
+                codec->time_base = (AVRational){1, codec->sample_rate};
                 ost->audio_resample = codec->sample_rate != icodec->sample_rate || audio_sync_method > 1;
                 icodec->request_channels = codec->channels;
                 ist->decoding_needed = 1;
@@ -3268,15 +3275,9 @@ static int opt_input_file(const char *opt, const char *filename)
             set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]);
             channel_layout    = dec->channel_layout;
             audio_channels    = dec->channels;
-            audio_sample_rate = dec->sample_rate;
             audio_sample_fmt  = dec->sample_fmt;
             if(audio_disable)
                 st->discard= AVDISCARD_ALL;
-            /* Note that av_find_stream_info can add more streams, and we
-             * currently have no chance of setting up lowres decoding
-             * early enough for them. */
-            if (dec->lowres)
-                audio_sample_rate >>= dec->lowres;
             break;
         case AVMEDIA_TYPE_VIDEO:
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(video_codec_name);
@@ -3338,6 +3339,7 @@ static int opt_input_file(const char *opt, const char *filename)
     input_files[nb_input_files - 1].ist_index  = nb_input_streams - ic->nb_streams;
 
     video_channel = 0;
+    audio_sample_rate = 0;
 
     av_freep(&video_codec_name);
     av_freep(&audio_codec_name);
@@ -3585,7 +3587,6 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
     if (audio_stream_copy) {
         st->stream_copy = 1;
         audio_enc->channels = audio_channels;
-        audio_enc->sample_rate = audio_sample_rate;
     } else {
         audio_enc->codec_id = codec_id;
         set_context_opts(audio_enc, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, codec);
@@ -3596,14 +3597,13 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
         }
         audio_enc->channels = audio_channels;
         audio_enc->sample_fmt = audio_sample_fmt;
-        audio_enc->sample_rate = audio_sample_rate;
+        if (audio_sample_rate)
+            audio_enc->sample_rate = audio_sample_rate;
         audio_enc->channel_layout = channel_layout;
         if (av_get_channel_layout_nb_channels(channel_layout) != audio_channels)
             audio_enc->channel_layout = 0;
         choose_sample_fmt(st, codec);
-        choose_sample_rate(st, codec);
     }
-    audio_enc->time_base= (AVRational){1, audio_sample_rate};
     if (audio_language) {
         av_dict_set(&st->metadata, "language", audio_language, 0);
         av_freep(&audio_language);
@@ -3889,6 +3889,8 @@ static void opt_output_file(const char *filename)
 
     set_context_opts(oc, avformat_opts, AV_OPT_FLAG_ENCODING_PARAM, NULL);
 
+    audio_sample_rate = 0;
+
     av_freep(&forced_key_frames);
     uninit_opts();
     init_opts();
diff --git a/tests/fate2.mak b/tests/fate2.mak
index 6a9448faf1..066f9ef583 100644
--- a/tests/fate2.mak
+++ b/tests/fate2.mak
@@ -165,7 +165,7 @@ fate-wmapro-2ch: CMP = oneoff
 fate-wmapro-2ch: REF = $(SAMPLES)/wmapro/Beethovens_9th-1_small.pcm
 
 FATE_TESTS += fate-ansi
-fate-ansi: CMD = framecrc -i $(SAMPLES)/ansi/TRE-IOM5.ANS -pix_fmt rgb24
+fate-ansi: CMD = framecrc -ar 44100 -i $(SAMPLES)/ansi/TRE-IOM5.ANS -pix_fmt rgb24
 
 FATE_TESTS += fate-wmv8-drm
 # discard last packet to avoid fails due to overread of VC-1 decoder
diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh
index 94d258334b..39e752b3c6 100755
--- a/tests/lavf-regression.sh
+++ b/tests/lavf-regression.sh
@@ -14,7 +14,7 @@ eval do_$test=y
 do_lavf()
 {
     file=${outfile}lavf.$1
-    do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $2
+    do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $2
     do_ffmpeg_crc $file $DEC_OPTS -i $target_path/$file $3
 }
 
@@ -39,8 +39,8 @@ do_image_formats()
 do_audio_only()
 {
     file=${outfile}lavf.$1
-    do_ffmpeg $file $DEC_OPTS $2 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $3
-    do_ffmpeg_crc $file $DEC_OPTS -i $target_path/$file
+    do_ffmpeg $file $DEC_OPTS $2 -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $3
+    do_ffmpeg_crc $file $DEC_OPTS $4 -i $target_path/$file
 }
 
 rm -f "$logfile"
@@ -55,7 +55,7 @@ fi
 
 if [ -n "$do_rm" ] ; then
 file=${outfile}lavf.rm
-do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 -acodec ac3_fixed
+do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 -acodec ac3_fixed
 # broken
 #do_ffmpeg_crc $file -i $target_path/$file
 fi
@@ -181,11 +181,11 @@ do_audio_only wav
 fi
 
 if [ -n "$do_alaw" ] ; then
-do_audio_only al
+do_audio_only al "" "" "-ar 44100"
 fi
 
 if [ -n "$do_mulaw" ] ; then
-do_audio_only ul
+do_audio_only ul "" "" "-ar 44100"
 fi
 
 if [ -n "$do_au" ] ; then
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 4cf2e20fd8..e57cdf111e 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -114,7 +114,7 @@ do_video_encoding()
 do_audio_encoding()
 {
     file=${outfile}$1
-    do_ffmpeg $file $DEC_OPTS -ac 2 -f s16le -i $pcm_src -ab 128k $ENC_OPTS $2
+    do_ffmpeg $file $DEC_OPTS -ac 2 -ar 44100 -f s16le -i $pcm_src -ab 128k $ENC_OPTS $2
 }
 
 do_audio_decoding()

From 8f3e999736b7bad956becb3705661f52d986eb2d Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 15 Jun 2011 08:00:03 +0200
Subject: [PATCH 827/830] ffmpeg: don't abuse a global for passing channels
 from input to output

It's broken with multiple files or audio streams.
---
 ffmpeg.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 1a00bdbb5b..3ed578902c 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -168,7 +168,7 @@ static int64_t channel_layout = 0;
 #define QSCALE_NONE -99999
 static float audio_qscale = QSCALE_NONE;
 static int audio_disable = 0;
-static int audio_channels = 1;
+static int audio_channels = 0;
 static char  *audio_codec_name = NULL;
 static unsigned int audio_codec_tag = 0;
 static char *audio_language = NULL;
@@ -2177,6 +2177,10 @@ static int transcode(AVFormatContext **output_files,
                 }
                 choose_sample_rate(ost->st, codec->codec);
                 codec->time_base = (AVRational){1, codec->sample_rate};
+                if (!codec->channels)
+                    codec->channels = icodec->channels;
+                if (av_get_channel_layout_nb_channels(codec->channel_layout) != codec->channels)
+                    codec->channel_layout = 0;
                 ost->audio_resample = codec->sample_rate != icodec->sample_rate || audio_sync_method > 1;
                 icodec->request_channels = codec->channels;
                 ist->decoding_needed = 1;
@@ -3274,7 +3278,6 @@ static int opt_input_file(const char *opt, const char *filename)
             input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(audio_codec_name);
             set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]);
             channel_layout    = dec->channel_layout;
-            audio_channels    = dec->channels;
             audio_sample_fmt  = dec->sample_fmt;
             if(audio_disable)
                 st->discard= AVDISCARD_ALL;
@@ -3340,6 +3343,7 @@ static int opt_input_file(const char *opt, const char *filename)
 
     video_channel = 0;
     audio_sample_rate = 0;
+    audio_channels    = 0;
 
     av_freep(&video_codec_name);
     av_freep(&audio_codec_name);
@@ -3586,7 +3590,6 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
     }
     if (audio_stream_copy) {
         st->stream_copy = 1;
-        audio_enc->channels = audio_channels;
     } else {
         audio_enc->codec_id = codec_id;
         set_context_opts(audio_enc, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, codec);
@@ -3595,13 +3598,12 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx)
             audio_enc->flags |= CODEC_FLAG_QSCALE;
             audio_enc->global_quality = st->quality = FF_QP2LAMBDA * audio_qscale;
         }
-        audio_enc->channels = audio_channels;
+        if (audio_channels)
+            audio_enc->channels = audio_channels;
         audio_enc->sample_fmt = audio_sample_fmt;
         if (audio_sample_rate)
             audio_enc->sample_rate = audio_sample_rate;
         audio_enc->channel_layout = channel_layout;
-        if (av_get_channel_layout_nb_channels(channel_layout) != audio_channels)
-            audio_enc->channel_layout = 0;
         choose_sample_fmt(st, codec);
     }
     if (audio_language) {
@@ -3890,6 +3892,7 @@ static void opt_output_file(const char *filename)
     set_context_opts(oc, avformat_opts, AV_OPT_FLAG_ENCODING_PARAM, NULL);
 
     audio_sample_rate = 0;
+    audio_channels    = 0;
 
     av_freep(&forced_key_frames);
     uninit_opts();

From a6286bda0956bfe15b4e1a9f96e1689666e1d866 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 15 Jun 2011 08:00:03 +0200
Subject: [PATCH 828/830] ffmpeg: don't abuse a global for passing framerate
 from input to output

It's broken with multiple files or video streams.
---
 ffmpeg.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 3ed578902c..1b31d5655f 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -271,6 +271,7 @@ typedef struct AVOutputStream {
     int resample_height;
     int resample_width;
     int resample_pix_fmt;
+    AVRational frame_rate;
 
     float frame_aspect_ratio;
 
@@ -2226,6 +2227,14 @@ static int transcode(AVFormatContext **output_files,
                 ost->encoding_needed = 1;
                 ist->decoding_needed = 1;
 
+                if (!ost->frame_rate.num)
+                    ost->frame_rate = ist->st->r_frame_rate.num ? ist->st->r_frame_rate : (AVRational){25,1};
+                if (codec->codec && codec->codec->supported_framerates && !force_fps) {
+                    int idx = av_find_nearest_q_idx(ost->frame_rate, codec->codec->supported_framerates);
+                    ost->frame_rate = codec->codec->supported_framerates[idx];
+                }
+                codec->time_base = (AVRational){ost->frame_rate.den, ost->frame_rate.num};
+
 #if CONFIG_AVFILTER
                 if (configure_video_filters(ist, ost)) {
                     fprintf(stderr, "Error opening filters!\n");
@@ -3308,9 +3317,6 @@ static int opt_input_file(const char *opt, const char *filename)
 
                     (float)rfps / rfps_base, rfps, rfps_base);
             }
-            /* update the current frame rate to match the stream frame rate */
-            frame_rate.num = rfps;
-            frame_rate.den = rfps_base;
 
             if(video_disable)
                 st->discard= AVDISCARD_ALL;
@@ -3342,6 +3348,7 @@ static int opt_input_file(const char *opt, const char *filename)
     input_files[nb_input_files - 1].ist_index  = nb_input_streams - ic->nb_streams;
 
     video_channel = 0;
+    frame_rate    = (AVRational){0, 0};
     audio_sample_rate = 0;
     audio_channels    = 0;
 
@@ -3455,16 +3462,12 @@ static void new_video_stream(AVFormatContext *oc, int file_idx)
     } else {
         const char *p;
         int i;
-        AVRational fps= frame_rate.num ? frame_rate : (AVRational){25,1};
 
+        if (frame_rate.num)
+            ost->frame_rate = frame_rate;
         video_enc->codec_id = codec_id;
         set_context_opts(video_enc, avcodec_opts[AVMEDIA_TYPE_VIDEO], AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, codec);
 
-        if (codec && codec->supported_framerates && !force_fps)
-            fps = codec->supported_framerates[av_find_nearest_q_idx(fps, codec->supported_framerates)];
-        video_enc->time_base.den = fps.num;
-        video_enc->time_base.num = fps.den;
-
         video_enc->width = frame_width;
         video_enc->height = frame_height;
         video_enc->pix_fmt = frame_pix_fmt;
@@ -3891,6 +3894,7 @@ static void opt_output_file(const char *filename)
 
     set_context_opts(oc, avformat_opts, AV_OPT_FLAG_ENCODING_PARAM, NULL);
 
+    frame_rate    = (AVRational){0, 0};
     audio_sample_rate = 0;
     audio_channels    = 0;
 

From b203f65451646b1555d458a3601159f7d89a3397 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 14 Jun 2011 13:45:38 -0400
Subject: [PATCH 829/830] ac3enc: use correct alignment and length in channel
 coupling dsp functions.

This fixes a segfault when using the C version of ac3dsp.float_to_fixed24().
---
 libavcodec/ac3enc_template.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 0547165aaf..f6248a82c9 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -134,36 +134,38 @@ void AC3_NAME(apply_channel_coupling)(AC3EncodeContext *s)
     LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
     int blk, ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
-    int num_cpl_coefs = s->num_cpl_subbands * 12;
+    int cpl_start, num_cpl_coefs;
 
     memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
     memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
 
+    /* align start to 16-byte boundary. align length to multiple of 32.
+        note: coupling start bin % 4 will always be 1 */
+    cpl_start     = s->start_freq[CPL_CH] - 1;
+    num_cpl_coefs = FFALIGN(s->num_cpl_subbands * 12 + 1, 32);
+    cpl_start     = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
+
     /* calculate coupling channel from fbw channels */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
+        CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
         if (!block->cpl_in_use)
             continue;
-        memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
+        memset(cpl_coef, 0, num_cpl_coefs * sizeof(*cpl_coef));
         for (ch = 1; ch <= s->fbw_channels; ch++) {
-            CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
+            CoefType *ch_coef = &block->mdct_coef[ch][cpl_start];
             if (!block->channel_in_cpl[ch])
                 continue;
             for (i = 0; i < num_cpl_coefs; i++)
                 cpl_coef[i] += ch_coef[i];
         }
-        /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
-                 will always be a multiple of 12, so we need to subtract 1 from
-                 the start and add 4 to the length when using optimized
-                 functions which require 16-byte alignment. */
 
         /* coefficients must be clipped to +/- 1.0 in order to be encoded */
-        s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
+        s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
 
         /* scale coupling coefficients from float to 24-bit fixed-point */
-        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
-                                   cpl_coef-1, num_cpl_coefs+4);
+        s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
+                                   cpl_coef, num_cpl_coefs);
     }
 
     /* calculate energy in each band in coupling channel and each fbw channel */

From a0bafaabb0656ca3bb3591beba0de79f6153fdac Mon Sep 17 00:00:00 2001
From: Kirill Zorin <cyril.zorin@gmail.com>
Date: Wed, 15 Jun 2011 19:18:29 +0200
Subject: [PATCH 830/830] mmsh: fixed printf injection bug in mmsh request

---
 libavformat/mmsh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mmsh.c b/libavformat/mmsh.c
index 9b432d1fb5..64760e8555 100644
--- a/libavformat/mmsh.c
+++ b/libavformat/mmsh.c
@@ -231,7 +231,7 @@ static int mmsh_open(URLContext *h, const char *uri, int flags)
         host, sizeof(host), &port, path, sizeof(path), location);
     if (port<0)
         port = 80; // default mmsh protocol port
-    ff_url_join(httpname, sizeof(httpname), "http", NULL, host, port, path);
+    ff_url_join(httpname, sizeof(httpname), "http", NULL, host, port, "%s", path);
 
     if (ffurl_alloc(&mms->mms_hd, httpname, AVIO_FLAG_READ) < 0) {
         return AVERROR(EIO);