diff --git a/Makefile b/Makefile
index e77dbe6f2b..70d2190890 100644
--- a/Makefile
+++ b/Makefile
@@ -57,7 +57,7 @@ $(PROGS): %$(EXESUF): %_g$(EXESUF)
 	$(STRIP) $@
 
 $(TOOLS): %$(EXESUF): %.o
-	$(LD) $(LDFLAGS) -o $@ $< $(ELIBS)
+	$(LD) $(LDFLAGS) $(LD_O) $< $(ELIBS)
 
 tools/cws2fws$(EXESUF): ELIBS = -lz
 
@@ -102,7 +102,7 @@ endef
 $(foreach P,$(PROGS-yes),$(eval $(call DOPROG,$(P))))
 
 %$(PROGSSUF)_g$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS)
-	$(LD) $(LDFLAGS) -o $@ $(OBJS-$*) cmdutils.o $(FF_EXTRALIBS)
+	$(LD) $(LDFLAGS) $(LD_O) $(OBJS-$*) cmdutils.o $(FF_EXTRALIBS)
 
 OBJDIRS += tools
 
diff --git a/common.mak b/common.mak
index ef3305bc2b..b41f885fce 100644
--- a/common.mak
+++ b/common.mak
@@ -109,7 +109,7 @@ $(HOSTOBJS): %.o: %.c
 	$(call COMPILE,HOSTCC)
 
 $(HOSTPROGS): %$(HOSTEXESUF): %.o
-	$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< $(HOSTLIBS)
+	$(HOSTCC) $(HOSTLDFLAGS) $(HOSTCC_O) $< $(HOSTLIBS)
 
 $(OBJS):     | $(sort $(dir $(OBJS)))
 $(HOSTOBJS): | $(sort $(dir $(HOSTOBJS)))
diff --git a/configure b/configure
index 51e97aad0a..48e307c5f1 100755
--- a/configure
+++ b/configure
@@ -709,11 +709,15 @@ check_cpp(){
     check_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC
 }
 
+as_o(){
+    eval printf '%s\\n' $AS_O
+}
+
 check_as(){
     log check_as "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C -o $TMPO $TMPC
+    check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C $(as_o $TMPO) $TMPC
 }
 
 check_inline_asm(){
@@ -735,6 +739,10 @@ check_yasm(){
     check_cmd $yasmexe $YASMFLAGS "$@" -o $TMPO $TMPS
 }
 
+ld_o(){
+    eval printf '%s\\n' $LD_O
+}
+
 check_ld(){
     log check_ld "$@"
     type=$1
@@ -747,7 +755,7 @@ check_ld(){
     check_$type $($cflags_filter $flags) || return
     flags=$($ldflags_filter $flags)
     libs=$($ldflags_filter $libs)
-    check_cmd $ld $LDFLAGS $flags -o $TMPE $TMPO $libs $extralibs
+    check_cmd $ld $LDFLAGS $flags $(ld_o $TMPE) $TMPO $libs $extralibs
 }
 
 check_code(){
@@ -1004,11 +1012,15 @@ require_pkg_config(){
     add_extralibs $(get_safe ${pkg}_libs)
 }
 
+hostcc_o(){
+    eval printf '%s\\n' $HOSTCC_O
+}
+
 check_host_cc(){
     log check_host_cc "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $host_cc $host_cflags "$@" -c -o $TMPO $TMPC
+    check_cmd $host_cc $host_cflags "$@" $HOSTCC_C $(hostcc_o $TMPO) $TMPC
 }
 
 check_host_cflags(){
@@ -2887,6 +2899,7 @@ case $target_os in
         disable symver
         SHFLAGS='-shared'
         SLIBNAME_WITH_MAJOR='$(SLIBNAME).$(LIBVERSION)'
+        SLIB_INSTALL_LINKS=
         oss_indev_extralibs="-lossaudio"
         oss_outdev_extralibs="-lossaudio"
         ;;
diff --git a/ffprobe.c b/ffprobe.c
index 196ee40280..ee32607992 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -2100,26 +2100,26 @@ static int opt_show_versions(const char *opt, const char *arg)
 
 static const OptionDef real_options[] = {
 #include "cmdutils_common_opts.h"
-    { "f", HAS_ARG, {(void*)opt_format}, "force format", "format" },
-    { "unit", OPT_BOOL, {(void*)&show_value_unit}, "show unit of the displayed values" },
-    { "prefix", OPT_BOOL, {(void*)&use_value_prefix}, "use SI prefixes for the displayed values" },
-    { "byte_binary_prefix", OPT_BOOL, {(void*)&use_byte_value_binary_prefix},
+    { "f", HAS_ARG, {.func_arg = opt_format}, "force format", "format" },
+    { "unit", OPT_BOOL, {&show_value_unit}, "show unit of the displayed values" },
+    { "prefix", OPT_BOOL, {&use_value_prefix}, "use SI prefixes for the displayed values" },
+    { "byte_binary_prefix", OPT_BOOL, {&use_byte_value_binary_prefix},
       "use binary prefixes for byte units" },
-    { "sexagesimal", OPT_BOOL,  {(void*)&use_value_sexagesimal_format},
+    { "sexagesimal", OPT_BOOL,  {&use_value_sexagesimal_format},
       "use sexagesimal format HOURS:MM:SS.MICROSECONDS for time units" },
-    { "pretty", 0, {(void*)&opt_pretty},
+    { "pretty", 0, {.func_arg = opt_pretty},
       "prettify the format of displayed values, make it more human readable" },
     { "print_format", OPT_STRING | HAS_ARG, {(void*)&print_format},
       "set the output printing format (available formats are: default, compact, csv, flat, ini, json, xml)", "format" },
     { "of", OPT_STRING | HAS_ARG, {(void*)&print_format}, "alias for -print_format", "format" },
     { "show_data",    OPT_BOOL, {(void*)&do_show_data}, "show packets data" },
     { "show_error",   OPT_BOOL, {(void*)&do_show_error} ,  "show probing error" },
-    { "show_format",  OPT_BOOL, {(void*)&do_show_format} , "show format/container info" },
+    { "show_format",  OPT_BOOL, {&do_show_format} , "show format/container info" },
     { "show_frames",  OPT_BOOL, {(void*)&do_show_frames} , "show frames info" },
-    { "show_format_entry", HAS_ARG, {(void*)opt_show_format_entry},
+    { "show_format_entry", HAS_ARG, {.func_arg = opt_show_format_entry},
       "show a particular entry from the format/container info", "entry" },
-    { "show_packets", OPT_BOOL, {(void*)&do_show_packets}, "show packets info" },
-    { "show_streams", OPT_BOOL, {(void*)&do_show_streams}, "show streams info" },
+    { "show_packets", OPT_BOOL, {&do_show_packets}, "show packets info" },
+    { "show_streams", OPT_BOOL, {&do_show_streams}, "show streams info" },
     { "count_frames", OPT_BOOL, {(void*)&do_count_frames}, "count the number of frames per stream" },
     { "count_packets", OPT_BOOL, {(void*)&do_count_packets}, "count the number of packets per stream" },
     { "show_program_version",  OPT_BOOL, {(void*)&do_show_program_version},  "show ffprobe version" },
@@ -2127,7 +2127,7 @@ static const OptionDef real_options[] = {
     { "show_versions",         0, {(void*)&opt_show_versions}, "show program and library versions" },
     { "show_private_data", OPT_BOOL, {(void*)&show_private_data}, "show private data" },
     { "private",           OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
-    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
+    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
     { "i", HAS_ARG, {(void *)opt_input_file}, "read specified file", "input_file"},
     { NULL, },
 };
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6eb00800aa..c3de2cbfbe 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -194,7 +194,7 @@ OBJS-$(CONFIG_FOURXM_DECODER)          += 4xm.o
 OBJS-$(CONFIG_FRAPS_DECODER)           += fraps.o
 OBJS-$(CONFIG_FRWU_DECODER)            += frwu.o
 OBJS-$(CONFIG_G723_1_DECODER)          += g723_1.o acelp_vectors.o \
-                                          celp_filters.o celp_math.o
+                                          celp_filters.o
 OBJS-$(CONFIG_G723_1_ENCODER)          += g723_1.o acelp_vectors.o celp_math.o
 OBJS-$(CONFIG_G729_DECODER)            += g729dec.o lsp.o celp_math.o acelp_filters.o acelp_pitch_delay.o acelp_vectors.o g729postfilter.o
 OBJS-$(CONFIG_GIF_DECODER)             += gifdec.o lzw.o
@@ -307,7 +307,7 @@ OBJS-$(CONFIG_MSMPEG4V3_ENCODER)       += msmpeg4.o msmpeg4enc.o msmpeg4data.o \
                                           mpeg4videodec.o
 OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o msrledec.o
 OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o mss34dsp.o
-OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o
+OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o mss12.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
 OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o
 OBJS-$(CONFIG_MSZH_DECODER)            += lcldec.o
@@ -341,7 +341,7 @@ OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o
 OBJS-$(CONFIG_PRORES_ANATOLIY_ENCODER) += proresenc_anatoliy.o
 OBJS-$(CONFIG_PRORES_KOSTYA_ENCODER)   += proresenc_kostya.o proresdata.o proresdsp.o
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
-OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
+OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o                     \
                                           celp_filters.o acelp_vectors.o \
                                           acelp_filters.o
 OBJS-$(CONFIG_QDM2_DECODER)            += qdm2.o
@@ -356,7 +356,7 @@ OBJS-$(CONFIG_R210_ENCODER)            += r210enc.o
 OBJS-$(CONFIG_RA_144_DECODER)          += ra144dec.o ra144.o celp_filters.o
 OBJS-$(CONFIG_RA_144_ENCODER)          += ra144enc.o ra144.o celp_filters.o \
                                           audio_frame_queue.o
-OBJS-$(CONFIG_RA_288_DECODER)          += ra288.o celp_math.o celp_filters.o
+OBJS-$(CONFIG_RA_288_DECODER)          += ra288.o celp_filters.o
 OBJS-$(CONFIG_RALF_DECODER)            += ralf.o
 OBJS-$(CONFIG_RAWVIDEO_DECODER)        += rawdec.o
 OBJS-$(CONFIG_RAWVIDEO_ENCODER)        += rawenc.o
@@ -423,7 +423,7 @@ OBJS-$(CONFIG_TRUESPEECH_DECODER)      += truespeech.o
 OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o msrledec.o
 OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
 OBJS-$(CONFIG_TTA_DECODER)             += tta.o
-OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvq.o celp_math.o
+OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvq.o
 OBJS-$(CONFIG_TXD_DECODER)             += txd.o s3tc.o
 OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
 OBJS-$(CONFIG_UTVIDEO_DECODER)         += utvideodec.o utvideo.o
@@ -468,7 +468,7 @@ OBJS-$(CONFIG_WMAV1_ENCODER)           += wmaenc.o wma.o wma_common.o aactab.o
 OBJS-$(CONFIG_WMAV2_DECODER)           += wmadec.o wma.o wma_common.o aactab.o
 OBJS-$(CONFIG_WMAV2_ENCODER)           += wmaenc.o wma.o wma_common.o aactab.o
 OBJS-$(CONFIG_WMAVOICE_DECODER)        += wmavoice.o \
-                                          celp_math.o celp_filters.o \
+                                          celp_filters.o \
                                           acelp_vectors.o acelp_filters.o
 OBJS-$(CONFIG_WMV1_DECODER)            += msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_WMV2_DECODER)            += wmv2dec.o wmv2.o        \
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 48230e013c..6c5293ac8a 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -131,7 +131,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy,
     // Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)).
     float val = fixed_gain_factor *
         exp2f(M_LOG2_10 * 0.05 *
-              (ff_dot_productf(pred_table, prediction_error, 4) +
+              (ff_scalarproduct_float_c(pred_table, prediction_error, 4) +
                energy_mean)) /
         sqrtf(fixed_mean_energy);
 
diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index 704001982e..aadacb4283 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -24,8 +24,8 @@
 
 #include "libavutil/common.h"
 #include "avcodec.h"
+#include "dsputil.h"
 #include "acelp_vectors.h"
-#include "celp_math.h"
 
 const uint8_t ff_fc_2pulses_9bits_track1[16] =
 {
@@ -203,7 +203,7 @@ void ff_adaptive_gain_control(float *out, const float *in, float speech_energ,
                               int size, float alpha, float *gain_mem)
 {
     int i;
-    float postfilter_energ = ff_dot_productf(in, in, size);
+    float postfilter_energ = ff_scalarproduct_float_c(in, in, size);
     float gain_scale_factor = 1.0;
     float mem = *gain_mem;
 
@@ -224,7 +224,7 @@ void ff_scale_vector_to_given_sum_of_squares(float *out, const float *in,
                                              float sum_of_squares, const int n)
 {
     int i;
-    float scalefactor = ff_dot_productf(in, in, n);
+    float scalefactor = ff_scalarproduct_float_c(in, in, n);
     if (scalefactor)
         scalefactor = sqrt(sum_of_squares / scalefactor);
     for (i = 0; i < n; i++)
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index 04c921c05b..b6b93dce9c 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -44,6 +44,7 @@
 #include <math.h>
 
 #include "avcodec.h"
+#include "dsputil.h"
 #include "libavutil/common.h"
 #include "libavutil/avassert.h"
 #include "celp_math.h"
@@ -798,7 +799,7 @@ static int synthesis(AMRContext *p, float *lpc,
     // emphasize pitch vector contribution
     if (p->pitch_gain[4] > 0.5 && !overflow) {
         float energy = p->celpm_ctx.dot_productf(excitation, excitation,
-                                       AMR_SUBFRAME_SIZE);
+                                                AMR_SUBFRAME_SIZE);
         float pitch_factor =
             p->pitch_gain[4] *
             (p->cur_frame_mode == MODE_12k2 ?
@@ -899,7 +900,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
     float *samples          = p->samples_in + LP_FILTER_ORDER; // Start of input
 
     float speech_gain       = p->celpm_ctx.dot_productf(samples, samples,
-                                              AMR_SUBFRAME_SIZE);
+                                                       AMR_SUBFRAME_SIZE);
 
     float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER];  // Output of pole filter
     const float *gamma_n, *gamma_d;                       // Formant filter factor table
@@ -1005,8 +1006,10 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
 
         p->fixed_gain[4] =
             ff_amr_set_fixed_gain(fixed_gain_factor,
-                       p->celpm_ctx.dot_productf(p->fixed_vector, p->fixed_vector,
-                                       AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE,
+                       p->celpm_ctx.dot_productf(p->fixed_vector,
+                                                           p->fixed_vector,
+                                                           AMR_SUBFRAME_SIZE) /
+                                  AMR_SUBFRAME_SIZE,
                        p->prediction_error,
                        energy_mean[p->cur_frame_mode], energy_pred_fac);
 
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 43f2044b8f..738dd98576 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -28,9 +28,10 @@
 #include "libavutil/lfg.h"
 
 #include "avcodec.h"
+#include "dsputil.h"
 #include "lsp.h"
-#include "celp_math.h"
 #include "celp_filters.h"
+#include "celp_math.h"
 #include "acelp_filters.h"
 #include "acelp_vectors.h"
 #include "acelp_pitch_delay.h"
@@ -600,9 +601,11 @@ static float voice_factor(float *p_vector, float p_gain,
                           CELPMContext *ctx)
 {
     double p_ener = (double) ctx->dot_productf(p_vector, p_vector,
-                                             AMRWB_SFR_SIZE) * p_gain * p_gain;
+                                             AMRWB_SFR_SIZE) *
+                                             p_gain * p_gain;
     double f_ener = (double) ctx->dot_productf(f_vector, f_vector,
-                                             AMRWB_SFR_SIZE) * f_gain * f_gain;
+                                             AMRWB_SFR_SIZE) *
+                                             f_gain * f_gain;
 
     return (p_ener - f_ener) / (p_ener + f_ener);
 }
@@ -771,7 +774,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
     if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
         int i;
         float energy = ctx->celpm_ctx.dot_productf(excitation, excitation,
-                                       AMRWB_SFR_SIZE);
+                                                AMRWB_SFR_SIZE);
 
         // XXX: Weird part in both ref code and spec. A unknown parameter
         // {beta} seems to be identical to the current pitch gain
@@ -832,8 +835,8 @@ static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *
 
         for (k = 1; k < 5; k++) {
             out[i] = ctx->dot_productf(in0 + int_part,
-                                     upsample_fir[4 - frac_part],
-                                     UPS_MEM_SIZE);
+                                              upsample_fir[4 - frac_part],
+                                              UPS_MEM_SIZE);
             int_part++;
             frac_part--;
             i++;
@@ -1175,8 +1178,10 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
 
         ctx->fixed_gain[0] =
             ff_amr_set_fixed_gain(fixed_gain_factor,
-                       ctx->celpm_ctx.dot_productf(ctx->fixed_vector, ctx->fixed_vector,
-                                       AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE,
+                                  ctx->celpm_ctx.dot_productf(ctx->fixed_vector,
+                                                           ctx->fixed_vector,
+                                                           AMRWB_SFR_SIZE) /
+                                  AMRWB_SFR_SIZE,
                        ctx->prediction_error,
                        ENERGY_MEAN, energy_pred_fac);
 
diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c
index e9bb0d3892..56dc86379e 100644
--- a/libavcodec/celp_math.c
+++ b/libavcodec/celp_math.c
@@ -29,97 +29,6 @@
 #include "celp_math.h"
 #include "libavutil/common.h"
 
-#ifdef G729_BITEXACT
-/**
- * Cosine table: base_cos[i] = (1<<15) * cos(i*PI/64)
- */
-static const int16_t base_cos[64] =
-{
-  32767,  32729,  32610,  32413,  32138,  31786,  31357,  30853,
-  30274,  29622,  28899,  28106,  27246,  26320,  25330,  24279,
-  23170,  22006,  20788,  19520,  18205,  16846,  15447,  14010,
-  12540,  11039,   9512,   7962,   6393,   4808,   3212,   1608,
-      0,  -1608,  -3212,  -4808,  -6393,  -7962,  -9512, -11039,
- -12540, -14010, -15447, -16846, -18205, -19520, -20788, -22006,
- -23170, -24279, -25330, -26320, -27246, -28106, -28899, -29622,
- -30274, -30853, -31357, -31786, -32138, -32413, -32610, -32729
-};
-
-/**
- * Slope used to compute cos(x)
- *
- * cos(ind*64+offset) = base_cos[ind]+offset*slope_cos[ind]
- * values multiplied by 1<<19
- */
-static const int16_t slope_cos[64] =
-{
-   -632,  -1893,  -3150,  -4399,  -5638,  -6863,  -8072,  -9261,
- -10428, -11570, -12684, -13767, -14817, -15832, -16808, -17744,
- -18637, -19486, -20287, -21039, -21741, -22390, -22986, -23526,
- -24009, -24435, -24801, -25108, -25354, -25540, -25664, -25726,
- -25726, -25664, -25540, -25354, -25108, -24801, -24435, -24009,
- -23526, -22986, -22390, -21741, -21039, -20287, -19486, -18637,
- -17744, -16808, -15832, -14817, -13767, -12684, -11570, -10428,
-  -9261,  -8072,  -6863,  -5638,  -4399,  -3150,  -1893,   -632
-};
-
-/**
- * Table used to compute exp2(x)
- *
- * tab_exp2[i] = (1<<14) * exp2(i/32) = 2^(i/32) i=0..32
- */
-static const uint16_t tab_exp2[33] =
-{
-  16384, 16743, 17109, 17484, 17867, 18258, 18658, 19066, 19484, 19911,
-  20347, 20792, 21247, 21713, 22188, 22674, 23170, 23678, 24196, 24726,
-  25268, 25821, 26386, 26964, 27554, 28158, 28774, 29405, 30048, 30706,
-  31379, 32066, 32767
-};
-
-int16_t ff_cos(uint16_t arg)
-{
-    uint8_t offset= arg;
-    uint8_t ind = arg >> 8;
-
-    av_assert2(arg < 0x4000);
-
-    return FFMAX(base_cos[ind] + ((slope_cos[ind] * offset) >> 12), -0x8000);
-}
-
-int ff_exp2(uint16_t power)
-{
-    uint16_t frac_x0;
-    uint16_t frac_dx;
-    int result;
-
-    av_assert2(power <= 0x7fff);
-
-    frac_x0 = power >> 10;
-    frac_dx = (power & 0x03ff) << 5;
-
-    result = tab_exp2[frac_x0] << 15;
-    result += frac_dx * (tab_exp2[frac_x0+1] - tab_exp2[frac_x0]);
-
-    return result >> 10;
-}
-
-#else // G729_BITEXACT
-
-/**
- * Cosine table: base_cos[i] = (1<<15) * cos(i*PI/64)
- */
-static const int16_t tab_cos[65] =
-{
-  32767,  32738,  32617,  32421,  32145,  31793,  31364,  30860,
-  30280,  29629,  28905,  28113,  27252,  26326,  25336,  24285,
-  23176,  22011,  20793,  19525,  18210,  16851,  15451,  14014,
-  12543,  11043,   9515,   7965,   6395,   4810,   3214,   1609,
-      1,  -1607,  -3211,  -4808,  -6393,  -7962,  -9513, -11040,
- -12541, -14012, -15449, -16848, -18207, -19523, -20791, -22009,
- -23174, -24283, -25334, -26324, -27250, -28111, -28904, -29627,
- -30279, -30858, -31363, -31792, -32144, -32419, -32616, -32736, -32768,
-};
-
 static const uint16_t exp2a[]=
 {
      0,  1435,  2901,  4400,  5931,  7496,  9096, 10730,
@@ -136,16 +45,6 @@ static const uint16_t exp2b[]=
  17176, 17898, 18620, 19343, 20066, 20790, 21514, 22238,
 };
 
-int16_t ff_cos(uint16_t arg)
-{
-    uint8_t offset= arg;
-    uint8_t ind = arg >> 8;
-
-    av_assert2(arg <= 0x3fff);
-
-    return tab_cos[ind] + (offset * (tab_cos[ind+1] - tab_cos[ind]) >> 8);
-}
-
 int ff_exp2(uint16_t power)
 {
     unsigned int result= exp2a[power>>10] + 0x10000;
@@ -156,8 +55,6 @@ int ff_exp2(uint16_t power)
     return result + ((result*(power&31)*89)>>22);
 }
 
-#endif // else G729_BITEXACT
-
 /**
  * Table used to compute log2(x)
  *
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index 16cc19ccd7..f12f67880b 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -44,14 +44,6 @@ typedef struct CELPMContext {
 void ff_celp_math_init(CELPMContext *c);
 void ff_celp_math_init_mips(CELPMContext *c);
 
-/**
- * fixed-point implementation of cosine in [0; PI) domain.
- * @param arg fixed-point cosine argument, 0 <= arg < 0x4000
- *
- * @return value of (1<<15) * cos(arg * PI / (1<<14)), -0x8000 <= result <= 0x7fff
- */
-int16_t ff_cos(uint16_t arg);
-
 /**
  * fixed-point implementation of exp2(x) in [0; 1] domain.
  * @param power argument to exp2, 0 <= power <= 0x7fff
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 3c9dd47c2f..e30d93ab80 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -374,8 +374,8 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
 }
 
 
-void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
-                             int line_size)
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
 {
     int i;
 
@@ -427,9 +427,9 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
     }
 }
 
-void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
-                                    uint8_t *restrict pixels,
-                                    int line_size)
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
 {
     int i, j;
 
@@ -448,8 +448,8 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
     }
 }
 
-void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
-                             int line_size)
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
 {
     int i;
 
@@ -2546,7 +2546,7 @@ static void butterflies_float_interleave_c(float *dst, const float *src0,
     }
 }
 
-static float scalarproduct_float_c(const float *v1, const float *v2, int len)
+float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
 {
     float p = 0.0;
     int i;
@@ -2728,22 +2728,22 @@ void ff_wmv2_idct_c(short * block){
 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_wmv2_idct_c(block);
-    ff_put_pixels_clamped_c(block, dest, line_size);
+    put_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_wmv2_idct_c(block);
-    ff_add_pixels_clamped_c(block, dest, line_size);
+    add_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_j_rev_dct (block);
-    ff_put_pixels_clamped_c(block, dest, line_size);
+    put_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_j_rev_dct (block);
-    ff_add_pixels_clamped_c(block, dest, line_size);
+    add_pixels_clamped_c(block, dest, line_size);
 }
 
 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
@@ -2890,9 +2890,9 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     }
 
     c->diff_pixels = diff_pixels_c;
-    c->put_pixels_clamped = ff_put_pixels_clamped_c;
-    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
-    c->add_pixels_clamped = ff_add_pixels_clamped_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
     c->sum_abs_dctelem = sum_abs_dctelem_c;
     c->gmc1 = gmc1_c;
     c->gmc = ff_gmc_c;
@@ -3047,7 +3047,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
     c->apply_window_int16 = apply_window_int16_c;
     c->vector_clip_int32 = vector_clip_int32_c;
-    c->scalarproduct_float = scalarproduct_float_c;
+    c->scalarproduct_float = ff_scalarproduct_float_c;
     c->butterflies_float = butterflies_float_c;
     c->butterflies_float_interleave = butterflies_float_interleave_c;
     c->vector_fmul_scalar = vector_fmul_scalar_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 42e1c9e61e..f9f6c89a5e 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -206,10 +206,6 @@ EMULATED_EDGE(10)
 EMULATED_EDGE(12)
 EMULATED_EDGE(14)
 
-void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-
 /**
  * DSPContext.
  */
@@ -560,6 +556,17 @@ attribute_deprecated void dsputil_init(DSPContext* c, AVCodecContext *avctx);
 
 int ff_check_alignment(void);
 
+/**
+ * Return the scalar product of two vectors.
+ *
+ * @param v1  first input vector
+ * @param v2  first input vector
+ * @param len number of elements
+ *
+ * @return sum of elementwise products
+ */
+float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
+
 /**
  * permute block according to permuatation.
  * @param last last non zero element in scantable order
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index c2d0c6dad4..62ac92fbe1 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -27,11 +27,9 @@
 #define FRAC_BITS 14
 #include "mathops.h"
 #include "lsp.h"
-#include "celp_math.h"
 #include "libavcodec/mips/lsp_mips.h"
 #include "libavutil/avassert.h"
 
-
 void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order)
 {
     int i, j;
@@ -58,6 +56,30 @@ void ff_set_min_dist_lsf(float *lsf, double min_spacing, int size)
         prev = lsf[i] = FFMAX(lsf[i], prev + min_spacing);
 }
 
+
+/* Cosine table: base_cos[i] = (1 << 15) * cos(i * PI / 64) */
+static const int16_t tab_cos[65] =
+{
+  32767,  32738,  32617,  32421,  32145,  31793,  31364,  30860,
+  30280,  29629,  28905,  28113,  27252,  26326,  25336,  24285,
+  23176,  22011,  20793,  19525,  18210,  16851,  15451,  14014,
+  12543,  11043,   9515,   7965,   6395,   4810,   3214,   1609,
+      1,  -1607,  -3211,  -4808,  -6393,  -7962,  -9513, -11040,
+ -12541, -14012, -15449, -16848, -18207, -19523, -20791, -22009,
+ -23174, -24283, -25334, -26324, -27250, -28111, -28904, -29627,
+ -30279, -30858, -31363, -31792, -32144, -32419, -32616, -32736, -32768,
+};
+
+static int16_t ff_cos(uint16_t arg)
+{
+    uint8_t offset= arg;
+    uint8_t ind = arg >> 8;
+
+    assert(arg <= 0x3fff);
+
+    return tab_cos[ind] + (offset * (tab_cos[ind+1] - tab_cos[ind]) >> 8);
+}
+
 void ff_acelp_lsf2lsp(int16_t *lsp, const int16_t *lsf, int lp_order)
 {
     int i;
diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c
index d2699a4477..7b4dbbbaa9 100644
--- a/libavcodec/mss1.c
+++ b/libavcodec/mss1.c
@@ -27,73 +27,13 @@
 #include "libavutil/intfloat.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
-#include "get_bits.h"
-
-enum SplitMode {
-    SPLIT_VERT = 0,
-    SPLIT_HOR,
-    SPLIT_NONE
-};
-
-typedef struct ArithCoder {
-    int low, high, value;
-    GetBitContext *gb;
-} ArithCoder;
-
-#define MODEL_MIN_SYMS    2
-#define MODEL_MAX_SYMS  256
-#define THRESH_ADAPTIVE  -1
-#define THRESH_LOW       15
-#define THRESH_HIGH      50
-
-typedef struct Model {
-    int cum_prob[MODEL_MAX_SYMS + 1];
-    int weights[MODEL_MAX_SYMS + 1];
-    int idx2sym[MODEL_MAX_SYMS + 1];
-    int sym2idx[MODEL_MAX_SYMS + 1];
-    int num_syms;
-    int thr_weight, threshold;
-} Model;
-
-static const int sec_order_sizes[4] = { 1, 7, 6, 1 };
-
-enum ContextDirection {
-    TOP_LEFT = 0,
-    TOP,
-    TOP_RIGHT,
-    LEFT
-};
-
-typedef struct PixContext {
-    int cache_size, num_syms;
-    uint8_t cache[12];
-    Model cache_model, full_model;
-    Model sec_models[4][8][4];
-} PixContext;
+#include "mss12.h"
 
 typedef struct MSS1Context {
-    AVCodecContext *avctx;
+    MSS12Context   ctx;
     AVFrame        pic;
-    uint8_t        *pic_start;
-    int            pic_stride;
-    uint8_t        *mask;
-    int            mask_linesize;
-    uint32_t       pal[256];
-    int            free_colours;
-    Model          intra_region, inter_region;
-    Model          pivot, edge_mode, split_mode;
-    PixContext     intra_pix_ctx, inter_pix_ctx;
-    int            corrupted;
 } MSS1Context;
 
-static void arith_init(ArithCoder *c, GetBitContext *gb)
-{
-    c->low   = 0;
-    c->high  = 0xFFFF;
-    c->value = get_bits(gb, 16);
-    c->gb    = gb;
-}
-
 static void arith_normalise(ArithCoder *c)
 {
     for (;;) {
@@ -178,88 +118,6 @@ static int arith_get_prob(ArithCoder *c, int *probs)
     return sym;
 }
 
-static int model_calc_threshold(Model *m)
-{
-    int thr;
-
-    if (m->thr_weight == -1) {
-        thr = 2 * m->weights[m->num_syms] - 1;
-        thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr;
-    } else {
-        thr = m->num_syms * m->thr_weight;
-    }
-
-    return FFMIN(thr, 0x3FFF);
-}
-
-static void model_reset(Model *m)
-{
-    int i;
-
-    for (i = 0; i <= m->num_syms; i++) {
-        m->weights[i]  = 1;
-        m->cum_prob[i] = m->num_syms - i;
-    }
-    m->weights[0]           = -1;
-    m->idx2sym[0]           = -1;
-    m->sym2idx[m->num_syms] = -1;
-    for (i = 0; i < m->num_syms; i++) {
-        m->sym2idx[i]     = i + 1;
-        m->idx2sym[i + 1] = i;
-    }
-}
-
-static av_cold void model_init(Model *m, int num_syms, int thr_weight)
-{
-    m->num_syms   = num_syms;
-    m->thr_weight = thr_weight;
-    m->threshold  = model_calc_threshold(m);
-    model_reset(m);
-}
-
-static void model_rescale_weights(Model *m)
-{
-    int i;
-    int cum_prob;
-
-    if (m->thr_weight == -1)
-        m->threshold = model_calc_threshold(m);
-    while (m->cum_prob[0] > m->threshold) {
-        cum_prob = 0;
-        for (i = m->num_syms; i >= 0; i--) {
-            m->cum_prob[i] = cum_prob;
-            m->weights[i]  = (m->weights[i] + 1) >> 1;
-            cum_prob      += m->weights[i];
-        }
-    }
-}
-
-static void model_update(Model *m, int val)
-{
-    int i;
-
-    if (m->weights[val] == m->weights[val - 1]) {
-        for (i = val; m->weights[i - 1] == m->weights[val]; i--);
-        if (i != val) {
-            int sym1, sym2;
-
-            sym1 = m->idx2sym[val];
-            sym2 = m->idx2sym[i];
-
-            m->idx2sym[val]  = sym2;
-            m->idx2sym[i]    = sym1;
-            m->sym2idx[sym1] = i;
-            m->sym2idx[sym2] = val;
-
-            val = i;
-        }
-    }
-    m->weights[val]++;
-    for (i = val - 1; i >= 0; i--)
-        m->cum_prob[i]++;
-    model_rescale_weights(m);
-}
-
 static int arith_get_model_sym(ArithCoder *c, Model *m)
 {
     int idx, val;
@@ -267,289 +125,33 @@ static int arith_get_model_sym(ArithCoder *c, Model *m)
     idx = arith_get_prob(c, m->cum_prob);
 
     val = m->idx2sym[idx];
-    model_update(m, idx);
+    ff_mss12_model_update(m, idx);
 
     arith_normalise(c);
 
     return val;
 }
 
-static void pixctx_reset(PixContext *ctx)
+static void arith_init(ArithCoder *c, GetBitContext *gb)
 {
-    int i, j, k;
+    c->low   = 0;
+    c->high  = 0xFFFF;
+    c->value = get_bits(gb, 16);
+    c->gb    = gb;
 
-    for (i = 0; i < ctx->cache_size; i++)
-        ctx->cache[i] = i;
-
-    model_reset(&ctx->cache_model);
-    model_reset(&ctx->full_model);
-
-    for (i = 0; i < 4; i++)
-        for (j = 0; j < sec_order_sizes[i]; j++)
-            for (k = 0; k < 4; k++)
-                model_reset(&ctx->sec_models[i][j][k]);
-}
-
-static av_cold void pixctx_init(PixContext *ctx, int cache_size)
-{
-    int i, j, k;
-
-    ctx->cache_size = cache_size + 4;
-    ctx->num_syms   = cache_size;
-
-    for (i = 0; i < ctx->cache_size; i++)
-        ctx->cache[i] = i;
-
-    model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW);
-    model_init(&ctx->full_model, 256, THRESH_HIGH);
-
-    for (i = 0; i < 4; i++) {
-        for (j = 0; j < sec_order_sizes[i]; j++) {
-            for (k = 0; k < 4; k++) {
-                model_init(&ctx->sec_models[i][j][k], 2 + i,
-                           i ? THRESH_LOW : THRESH_ADAPTIVE);
-            }
-        }
-    }
-}
-
-static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx)
-{
-    int i, val, pix;
-
-    val = arith_get_model_sym(acoder, &pctx->cache_model);
-    if (val < pctx->num_syms) {
-        pix = pctx->cache[val];
-    } else {
-        pix = arith_get_model_sym(acoder, &pctx->full_model);
-        for (i = 0; i < pctx->cache_size - 1; i++)
-            if (pctx->cache[i] == pix)
-                break;
-        val = i;
-    }
-    if (val) {
-        for (i = val; i > 0; i--)
-            pctx->cache[i] = pctx->cache[i - 1];
-        pctx->cache[0] = pix;
-    }
-
-    return pix;
-}
-
-static int decode_pixel(ArithCoder *acoder, PixContext *pctx,
-                        uint8_t *ngb, int num_ngb)
-{
-    int i, val, pix;
-
-    val = arith_get_model_sym(acoder, &pctx->cache_model);
-    if (val < pctx->num_syms) {
-        int idx, j;
-
-
-        idx = 0;
-        for (i = 0; i < pctx->cache_size; i++) {
-            for (j = 0; j < num_ngb; j++)
-                if (pctx->cache[i] == ngb[j])
-                    break;
-            if (j == num_ngb) {
-                if (idx == val)
-                    break;
-                idx++;
-            }
-        }
-        val = FFMIN(i, pctx->cache_size - 1);
-        pix = pctx->cache[val];
-    } else {
-        pix = arith_get_model_sym(acoder, &pctx->full_model);
-        for (i = 0; i < pctx->cache_size - 1; i++)
-            if (pctx->cache[i] == pix)
-                break;
-        val = i;
-    }
-    if (val) {
-        for (i = val; i > 0; i--)
-            pctx->cache[i] = pctx->cache[i - 1];
-        pctx->cache[0] = pix;
-    }
-
-    return pix;
-}
-
-static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx,
-                                   uint8_t *src, int stride, int x, int y,
-                                   int has_right)
-{
-    uint8_t neighbours[4];
-    uint8_t ref_pix[4];
-    int nlen;
-    int layer = 0, sub;
-    int pix;
-    int i, j;
-
-    if (!y) {
-        memset(neighbours, src[-1], 4);
-    } else {
-        neighbours[TOP] = src[-stride];
-        if (!x) {
-            neighbours[TOP_LEFT] = neighbours[LEFT] = neighbours[TOP];
-        } else {
-            neighbours[TOP_LEFT] = src[-stride - 1];
-            neighbours[    LEFT] = src[-1];
-        }
-        if (has_right)
-            neighbours[TOP_RIGHT] = src[-stride + 1];
-        else
-            neighbours[TOP_RIGHT] = neighbours[TOP];
-    }
-
-    sub = 0;
-    if (x >= 2 && src[-2] == neighbours[LEFT])
-        sub  = 1;
-    if (y >= 2 && src[-2 * stride] == neighbours[TOP])
-        sub |= 2;
-
-    nlen = 1;
-    ref_pix[0] = neighbours[0];
-    for (i = 1; i < 4; i++) {
-        for (j = 0; j < nlen; j++)
-            if (ref_pix[j] == neighbours[i])
-                break;
-        if (j == nlen)
-            ref_pix[nlen++] = neighbours[i];
-    }
-
-    switch (nlen) {
-    case 1:
-    case 4:
-        layer = 0;
-        break;
-    case 2:
-        if (neighbours[TOP] == neighbours[TOP_LEFT]) {
-            if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
-                layer = 3;
-            else if (neighbours[LEFT] == neighbours[TOP_LEFT])
-                layer = 2;
-            else
-                layer = 4;
-        } else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) {
-            if (neighbours[LEFT] == neighbours[TOP_LEFT])
-                layer = 1;
-            else
-                layer = 5;
-        } else if (neighbours[LEFT] == neighbours[TOP_LEFT]) {
-            layer = 6;
-        } else {
-            layer = 0;
-        }
-        break;
-    case 3:
-        if (neighbours[TOP] == neighbours[TOP_LEFT])
-            layer = 0;
-        else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
-            layer = 1;
-        else if (neighbours[LEFT] == neighbours[TOP_LEFT])
-            layer = 2;
-        else if (neighbours[TOP_RIGHT] == neighbours[TOP])
-            layer = 3;
-        else if (neighbours[TOP] == neighbours[LEFT])
-            layer = 4;
-        else
-            layer = 5;
-        break;
-    }
-
-    pix = arith_get_model_sym(acoder, &pctx->sec_models[nlen - 1][layer][sub]);
-    if (pix < nlen)
-        return ref_pix[pix];
-    else
-        return decode_pixel(acoder, pctx, ref_pix, nlen);
-}
-
-static int decode_region(MSS1Context *ctx, ArithCoder *acoder, uint8_t *dst,
-                         int x, int y, int width, int height, int stride,
-                         PixContext *pctx)
-{
-    int i, j;
-
-    dst += x + y * stride;
-
-    dst[0] = decode_top_left_pixel(acoder, pctx);
-    for (j = 0; j < height; j++) {
-        for (i = 0; i < width; i++) {
-            if (!i && !j)
-                continue;
-
-            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
-                                             i, j, width - i - 1);
-        }
-        dst += stride;
-    }
-
-    return 0;
-}
-
-static int decode_region_masked(MSS1Context *ctx, ArithCoder *acoder,
-                                uint8_t *dst, int stride, uint8_t *mask,
-                                int mask_stride, int x, int y,
-                                int width, int height,
-                                PixContext *pctx)
-{
-    int i, j;
-
-    dst  += x + y * stride;
-    mask += x + y * mask_stride;
-
-    if (mask[0] == 0xFF)
-        dst[0] = decode_top_left_pixel(acoder, pctx);
-    for (j = 0; j < height; j++) {
-        for (i = 0; i < width; i++) {
-            if (!i && !j || mask[i] != 0xFF)
-                continue;
-
-            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
-                                             i, j, width - i - 1);
-        }
-        dst  += stride;
-        mask += mask_stride;
-    }
-
-    return 0;
-}
-
-static av_cold void codec_init(MSS1Context *ctx)
-{
-    model_init(&ctx->intra_region, 2, THRESH_ADAPTIVE);
-    model_init(&ctx->inter_region, 2, THRESH_ADAPTIVE);
-    model_init(&ctx->split_mode,   3, THRESH_HIGH);
-    model_init(&ctx->edge_mode,    2, THRESH_HIGH);
-    model_init(&ctx->pivot,        3, THRESH_LOW);
-    pixctx_init(&ctx->intra_pix_ctx, 8);
-    pixctx_init(&ctx->inter_pix_ctx, 2);
-    ctx->corrupted = 1;
-}
-
-static void codec_reset(MSS1Context *ctx)
-{
-    model_reset(&ctx->intra_region);
-    model_reset(&ctx->inter_region);
-    model_reset(&ctx->split_mode);
-    model_reset(&ctx->edge_mode);
-    model_reset(&ctx->pivot);
-    pixctx_reset(&ctx->intra_pix_ctx);
-    pixctx_reset(&ctx->inter_pix_ctx);
-
-    ctx->corrupted = 0;
+    c->get_model_sym = arith_get_model_sym;
+    c->get_number    = arith_get_number;
 }
 
 static int decode_pal(MSS1Context *ctx, ArithCoder *acoder)
 {
     int i, ncol, r, g, b;
-    uint32_t *pal = ctx->pal + 256 - ctx->free_colours;
+    uint32_t *pal = ctx->ctx.pal + 256 - ctx->ctx.free_colours;
 
-    if (!ctx->free_colours)
+    if (!ctx->ctx.free_colours)
         return 0;
 
-    ncol = arith_get_number(acoder, ctx->free_colours + 1);
+    ncol = arith_get_number(acoder, ctx->ctx.free_colours + 1);
     for (i = 0; i < ncol; i++) {
         r = arith_get_bits(acoder, 8);
         g = arith_get_bits(acoder, 8);
@@ -560,154 +162,6 @@ static int decode_pal(MSS1Context *ctx, ArithCoder *acoder)
     return !!ncol;
 }
 
-static int decode_pivot(MSS1Context *ctx, ArithCoder *acoder, int base)
-{
-    int val, inv;
-
-    inv = arith_get_model_sym(acoder, &ctx->edge_mode);
-    val = arith_get_model_sym(acoder, &ctx->pivot) + 1;
-
-    if (val > 2) {
-        if ((base + 1) / 2 - 2 <= 0) {
-            ctx->corrupted = 1;
-            return 0;
-        }
-        val = arith_get_number(acoder, (base + 1) / 2 - 2) + 3;
-    }
-
-    if ((unsigned)val >= base) {
-        ctx->corrupted = 1;
-        return 0;
-    }
-
-    return inv ? base - val : val;
-}
-
-static int decode_region_intra(MSS1Context *ctx, ArithCoder *acoder,
-                               int x, int y, int width, int height)
-{
-    int mode;
-
-    mode = arith_get_model_sym(acoder, &ctx->intra_region);
-
-    if (!mode) {
-        int i, pix;
-        int stride = ctx->pic_stride;
-        uint8_t *dst = ctx->pic_start + x + y * stride;
-
-        pix = decode_top_left_pixel(acoder, &ctx->intra_pix_ctx);
-        for (i = 0; i < height; i++, dst += stride)
-            memset(dst, pix, width);
-    } else {
-        return decode_region(ctx, acoder, ctx->pic_start,
-                             x, y, width, height, ctx->pic_stride,
-                             &ctx->intra_pix_ctx);
-    }
-
-    return 0;
-}
-
-static int decode_intra(MSS1Context *ctx, ArithCoder *acoder,
-                        int x, int y, int width, int height)
-{
-    int mode, pivot;
-
-    if (ctx->corrupted)
-        return -1;
-
-    mode = arith_get_model_sym(acoder, &ctx->split_mode);
-
-    switch (mode) {
-    case SPLIT_VERT:
-        pivot = decode_pivot(ctx, acoder, height);
-        if (ctx->corrupted)
-            return -1;
-        if (decode_intra(ctx, acoder, x, y, width, pivot))
-            return -1;
-        if (decode_intra(ctx, acoder, x, y + pivot, width, height - pivot))
-            return -1;
-        break;
-    case SPLIT_HOR:
-        pivot = decode_pivot(ctx, acoder, width);
-        if (ctx->corrupted)
-            return -1;
-        if (decode_intra(ctx, acoder, x, y, pivot, height))
-            return -1;
-        if (decode_intra(ctx, acoder, x + pivot, y, width - pivot, height))
-            return -1;
-        break;
-    case SPLIT_NONE:
-        return decode_region_intra(ctx, acoder, x, y, width, height);
-    default:
-        return -1;
-    }
-
-    return 0;
-}
-
-static int decode_region_inter(MSS1Context *ctx, ArithCoder *acoder,
-                               int x, int y, int width, int height)
-{
-    int mode;
-
-    mode = arith_get_model_sym(acoder, &ctx->inter_region);
-
-    if (!mode) {
-        mode = decode_top_left_pixel(acoder, &ctx->inter_pix_ctx);
-        if (mode != 0xFF) {
-            return 0;
-        } else {
-            return decode_region_intra(ctx, acoder, x, y, width, height);
-        }
-    } else {
-        if (decode_region(ctx, acoder, ctx->mask,
-                          x, y, width, height, ctx->mask_linesize,
-                          &ctx->inter_pix_ctx) < 0)
-            return -1;
-        return decode_region_masked(ctx, acoder, ctx->pic_start,
-                                    -ctx->pic.linesize[0], ctx->mask,
-                                    ctx->mask_linesize,
-                                    x, y, width, height,
-                                    &ctx->intra_pix_ctx);
-    }
-
-    return 0;
-}
-
-static int decode_inter(MSS1Context *ctx, ArithCoder *acoder,
-                        int x, int y, int width, int height)
-{
-    int mode, pivot;
-
-    if (ctx->corrupted)
-        return -1;
-
-    mode = arith_get_model_sym(acoder, &ctx->split_mode);
-
-    switch (mode) {
-    case SPLIT_VERT:
-        pivot = decode_pivot(ctx, acoder, height);
-        if (decode_inter(ctx, acoder, x, y, width, pivot))
-            return -1;
-        if (decode_inter(ctx, acoder, x, y + pivot, width, height - pivot))
-            return -1;
-        break;
-    case SPLIT_HOR:
-        pivot = decode_pivot(ctx, acoder, width);
-        if (decode_inter(ctx, acoder, x, y, pivot, height))
-            return -1;
-        if (decode_inter(ctx, acoder, x + pivot, y, width - pivot, height))
-            return -1;
-        break;
-    case SPLIT_NONE:
-        return decode_region_inter(ctx, acoder, x, y, width, height);
-    default:
-        return -1;
-    }
-
-    return 0;
-}
-
 static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
                              AVPacket *avpkt)
 {
@@ -730,26 +184,25 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         return ret;
     }
 
-    c->pic_start  = c->pic.data[0] + c->pic.linesize[0] * (avctx->height - 1);
-    c->pic_stride = -c->pic.linesize[0];
-    if (!arith_get_bit(&acoder)) {
-        codec_reset(c);
+    c->ctx.pic_start  = c->pic.data[0] + c->pic.linesize[0] * (avctx->height - 1);
+    c->ctx.pic_stride = -c->pic.linesize[0];
+    c->ctx.keyframe   = !arith_get_bit(&acoder);
+    if (c->ctx.keyframe) {
+        ff_mss12_codec_reset(&c->ctx);
         pal_changed      = decode_pal(c, &acoder);
-        c->corrupted     = decode_intra(c, &acoder, 0, 0,
-                                        avctx->width, avctx->height);
         c->pic.key_frame = 1;
         c->pic.pict_type = AV_PICTURE_TYPE_I;
     } else {
-        if (c->corrupted)
+        if (c->ctx.corrupted)
             return AVERROR_INVALIDDATA;
-        c->corrupted     = decode_inter(c, &acoder, 0, 0,
-                                        avctx->width, avctx->height);
         c->pic.key_frame = 0;
         c->pic.pict_type = AV_PICTURE_TYPE_P;
     }
-    if (c->corrupted)
+    c->ctx.corrupted = ff_mss12_decode_rect(&c->ctx, &acoder, 0, 0,
+                                            avctx->width, avctx->height);
+    if (c->ctx.corrupted)
         return AVERROR_INVALIDDATA;
-    memcpy(c->pic.data[1], c->pal, AVPALETTE_SIZE);
+    memcpy(c->pic.data[1], c->ctx.pal, AVPALETTE_SIZE);
     c->pic.palette_has_changed = pal_changed;
 
     *data_size = sizeof(AVFrame);
@@ -762,69 +215,11 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 static av_cold int mss1_decode_init(AVCodecContext *avctx)
 {
     MSS1Context * const c = avctx->priv_data;
-    int i;
-
-    c->avctx = avctx;
-
-    if (avctx->extradata_size < 52 + 256 * 3) {
-        av_log(avctx, AV_LOG_ERROR, "Insufficient extradata size %d\n",
-               avctx->extradata_size);
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (AV_RB32(avctx->extradata) < avctx->extradata_size) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Insufficient extradata size: expected %d got %d\n",
-               AV_RB32(avctx->extradata),
-               avctx->extradata_size);
-        return AVERROR_INVALIDDATA;
-    }
-
-    av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d\n",
-           AV_RB32(avctx->extradata + 4), AV_RB32(avctx->extradata + 8));
-    c->free_colours     = AV_RB32(avctx->extradata + 48);
-    if ((unsigned)c->free_colours > 256) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Incorrect number of changeable palette entries: %d\n",
-               c->free_colours);
-        return AVERROR_INVALIDDATA;
-    }
-    av_log(avctx, AV_LOG_DEBUG, "%d free colour(s)\n", c->free_colours);
-    avctx->coded_width  = AV_RB32(avctx->extradata + 20);
-    avctx->coded_height = AV_RB32(avctx->extradata + 24);
-
-    av_log(avctx, AV_LOG_DEBUG, "Display dimensions %dx%d\n",
-           AV_RB32(avctx->extradata + 12), AV_RB32(avctx->extradata + 16));
-    av_log(avctx, AV_LOG_DEBUG, "Coded dimensions %dx%d\n",
-           avctx->coded_width, avctx->coded_height);
-    av_log(avctx, AV_LOG_DEBUG, "%g frames per second\n",
-           av_int2float(AV_RB32(avctx->extradata + 28)));
-    av_log(avctx, AV_LOG_DEBUG, "Bitrate %d bps\n",
-           AV_RB32(avctx->extradata + 32));
-    av_log(avctx, AV_LOG_DEBUG, "Max. lead time %g ms\n",
-           av_int2float(AV_RB32(avctx->extradata + 36)));
-    av_log(avctx, AV_LOG_DEBUG, "Max. lag time %g ms\n",
-           av_int2float(AV_RB32(avctx->extradata + 40)));
-    av_log(avctx, AV_LOG_DEBUG, "Max. seek time %g ms\n",
-           av_int2float(AV_RB32(avctx->extradata + 44)));
-
-    for (i = 0; i < 256; i++)
-        c->pal[i] = 0xFF << 24 | AV_RB24(avctx->extradata + 52 + i * 3);
-
-    avctx->pix_fmt = PIX_FMT_PAL8;
-
-    c->mask_linesize = FFALIGN(avctx->width, 16);
-    c->mask          = av_malloc(c->mask_linesize * avctx->height);
-    if (!c->mask) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot allocate mask plane\n");
-        return AVERROR(ENOMEM);
-    }
 
+    c->ctx.avctx       = avctx;
     avctx->coded_frame = &c->pic;
 
-    codec_init(c);
-
-    return 0;
+    return ff_mss12_decode_init(avctx, 0);
 }
 
 static av_cold int mss1_decode_end(AVCodecContext *avctx)
@@ -833,7 +228,7 @@ static av_cold int mss1_decode_end(AVCodecContext *avctx)
 
     if (c->pic.data[0])
         avctx->release_buffer(avctx, &c->pic);
-    av_freep(&c->mask);
+    ff_mss12_decode_end(avctx);
 
     return 0;
 }
diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c
new file mode 100644
index 0000000000..38291d910c
--- /dev/null
+++ b/libavcodec/mss12.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright (c) 2012 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Common functions for Microsoft Screen 1 and 2
+ */
+
+#include "libavutil/intfloat.h"
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "mss12.h"
+
+enum SplitMode {
+    SPLIT_VERT = 0,
+    SPLIT_HOR,
+    SPLIT_NONE
+};
+
+static const int sec_order_sizes[4] = { 1, 7, 6, 1 };
+
+enum ContextDirection {
+    TOP_LEFT = 0,
+    TOP,
+    TOP_RIGHT,
+    LEFT
+};
+
+static int model_calc_threshold(Model *m)
+{
+    int thr;
+
+    if (m->thr_weight == -1) {
+        thr = 2 * m->weights[m->num_syms] - 1;
+        thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr;
+    } else {
+        thr = m->num_syms * m->thr_weight;
+    }
+
+    return FFMIN(thr, 0x3FFF);
+}
+
+static void model_reset(Model *m)
+{
+    int i;
+
+    for (i = 0; i <= m->num_syms; i++) {
+        m->weights[i]  = 1;
+        m->cum_prob[i] = m->num_syms - i;
+    }
+    m->weights[0]           = -1;
+    m->idx2sym[0]           = -1;
+    m->sym2idx[m->num_syms] = -1;
+    for (i = 0; i < m->num_syms; i++) {
+        m->sym2idx[i]     = i + 1;
+        m->idx2sym[i + 1] = i;
+    }
+}
+
+static av_cold void model_init(Model *m, int num_syms, int thr_weight)
+{
+    m->num_syms   = num_syms;
+    m->thr_weight = thr_weight;
+    m->threshold  = model_calc_threshold(m);
+    model_reset(m);
+}
+
+static void model_rescale_weights(Model *m)
+{
+    int i;
+    int cum_prob;
+
+    if (m->thr_weight == -1)
+        m->threshold = model_calc_threshold(m);
+    while (m->cum_prob[0] > m->threshold) {
+        cum_prob = 0;
+        for (i = m->num_syms; i >= 0; i--) {
+            m->cum_prob[i] = cum_prob;
+            m->weights[i]  = (m->weights[i] + 1) >> 1;
+            cum_prob      += m->weights[i];
+        }
+    }
+}
+
+void ff_mss12_model_update(Model *m, int val)
+{
+    int i;
+
+    if (m->weights[val] == m->weights[val - 1]) {
+        for (i = val; m->weights[i - 1] == m->weights[val]; i--);
+        if (i != val) {
+            int sym1, sym2;
+
+            sym1 = m->idx2sym[val];
+            sym2 = m->idx2sym[i];
+
+            m->idx2sym[val]  = sym2;
+            m->idx2sym[i]    = sym1;
+            m->sym2idx[sym1] = i;
+            m->sym2idx[sym2] = val;
+
+            val = i;
+        }
+    }
+    m->weights[val]++;
+    for (i = val - 1; i >= 0; i--)
+        m->cum_prob[i]++;
+    model_rescale_weights(m);
+}
+
+static void pixctx_reset(PixContext *ctx)
+{
+    int i, j, k;
+
+    for (i = 0; i < ctx->cache_size; i++)
+        ctx->cache[i] = i;
+
+    model_reset(&ctx->cache_model);
+    model_reset(&ctx->full_model);
+
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < sec_order_sizes[i]; j++)
+            for (k = 0; k < 4; k++)
+                model_reset(&ctx->sec_models[i][j][k]);
+}
+
+static av_cold void pixctx_init(PixContext *ctx, int cache_size)
+{
+    int i, j, k;
+
+    ctx->cache_size = cache_size + 4;
+    ctx->num_syms   = cache_size;
+
+    for (i = 0; i < ctx->cache_size; i++)
+        ctx->cache[i] = i;
+
+    model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW);
+    model_init(&ctx->full_model, 256, THRESH_HIGH);
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < sec_order_sizes[i]; j++) {
+            for (k = 0; k < 4; k++) {
+                model_init(&ctx->sec_models[i][j][k], 2 + i,
+                           i ? THRESH_LOW : THRESH_ADAPTIVE);
+            }
+        }
+    }
+}
+
+static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx)
+{
+    int i, val, pix;
+
+    val = acoder->get_model_sym(acoder, &pctx->cache_model);
+    if (val < pctx->num_syms) {
+        pix = pctx->cache[val];
+    } else {
+        pix = acoder->get_model_sym(acoder, &pctx->full_model);
+        for (i = 0; i < pctx->cache_size - 1; i++)
+            if (pctx->cache[i] == pix)
+                break;
+        val = i;
+    }
+    if (val) {
+        for (i = val; i > 0; i--)
+            pctx->cache[i] = pctx->cache[i - 1];
+        pctx->cache[0] = pix;
+    }
+
+    return pix;
+}
+
+static int decode_pixel(ArithCoder *acoder, PixContext *pctx,
+                        uint8_t *ngb, int num_ngb)
+{
+    int i, val, pix;
+
+    val = acoder->get_model_sym(acoder, &pctx->cache_model);
+    if (val < pctx->num_syms) {
+        int idx, j;
+
+
+        idx = 0;
+        for (i = 0; i < pctx->cache_size; i++) {
+            for (j = 0; j < num_ngb; j++)
+                if (pctx->cache[i] == ngb[j])
+                    break;
+            if (j == num_ngb) {
+                if (idx == val)
+                    break;
+                idx++;
+            }
+        }
+        val = FFMIN(i, pctx->cache_size - 1);
+        pix = pctx->cache[val];
+    } else {
+        pix = acoder->get_model_sym(acoder, &pctx->full_model);
+        for (i = 0; i < pctx->cache_size - 1; i++)
+            if (pctx->cache[i] == pix)
+                break;
+        val = i;
+    }
+    if (val) {
+        for (i = val; i > 0; i--)
+            pctx->cache[i] = pctx->cache[i - 1];
+        pctx->cache[0] = pix;
+    }
+
+    return pix;
+}
+
+static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx,
+                                   uint8_t *src, int stride, int x, int y,
+                                   int has_right)
+{
+    uint8_t neighbours[4];
+    uint8_t ref_pix[4];
+    int nlen;
+    int layer = 0, sub;
+    int pix;
+    int i, j;
+
+    if (!y) {
+        memset(neighbours, src[-1], 4);
+    } else {
+        neighbours[TOP] = src[-stride];
+        if (!x) {
+            neighbours[TOP_LEFT] = neighbours[LEFT] = neighbours[TOP];
+        } else {
+            neighbours[TOP_LEFT] = src[-stride - 1];
+            neighbours[    LEFT] = src[-1];
+        }
+        if (has_right)
+            neighbours[TOP_RIGHT] = src[-stride + 1];
+        else
+            neighbours[TOP_RIGHT] = neighbours[TOP];
+    }
+
+    sub = 0;
+    if (x >= 2 && src[-2] == neighbours[LEFT])
+        sub  = 1;
+    if (y >= 2 && src[-2 * stride] == neighbours[TOP])
+        sub |= 2;
+
+    nlen = 1;
+    ref_pix[0] = neighbours[0];
+    for (i = 1; i < 4; i++) {
+        for (j = 0; j < nlen; j++)
+            if (ref_pix[j] == neighbours[i])
+                break;
+        if (j == nlen)
+            ref_pix[nlen++] = neighbours[i];
+    }
+
+    switch (nlen) {
+    case 1:
+    case 4:
+        layer = 0;
+        break;
+    case 2:
+        if (neighbours[TOP] == neighbours[TOP_LEFT]) {
+            if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
+                layer = 3;
+            else if (neighbours[LEFT] == neighbours[TOP_LEFT])
+                layer = 2;
+            else
+                layer = 4;
+        } else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) {
+            if (neighbours[LEFT] == neighbours[TOP_LEFT])
+                layer = 1;
+            else
+                layer = 5;
+        } else if (neighbours[LEFT] == neighbours[TOP_LEFT]) {
+            layer = 6;
+        } else {
+            layer = 0;
+        }
+        break;
+    case 3:
+        if (neighbours[TOP] == neighbours[TOP_LEFT])
+            layer = 0;
+        else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
+            layer = 1;
+        else if (neighbours[LEFT] == neighbours[TOP_LEFT])
+            layer = 2;
+        else if (neighbours[TOP_RIGHT] == neighbours[TOP])
+            layer = 3;
+        else if (neighbours[TOP] == neighbours[LEFT])
+            layer = 4;
+        else
+            layer = 5;
+        break;
+    }
+
+    pix = acoder->get_model_sym(acoder, &pctx->sec_models[nlen - 1][layer][sub]);
+    if (pix < nlen)
+        return ref_pix[pix];
+    else
+        return decode_pixel(acoder, pctx, ref_pix, nlen);
+}
+
+static int decode_region(MSS12Context *ctx, ArithCoder *acoder, uint8_t *dst,
+                         int x, int y, int width, int height, int stride,
+                         PixContext *pctx)
+{
+    int i, j;
+
+    dst += x + y * stride;
+
+    dst[0] = decode_top_left_pixel(acoder, pctx);
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            if (!i && !j)
+                continue;
+
+            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
+                                             i, j, width - i - 1);
+        }
+        dst += stride;
+    }
+
+    return 0;
+}
+
+static int decode_region_masked(MSS12Context *ctx, ArithCoder *acoder,
+                                uint8_t *dst, int stride, uint8_t *mask,
+                                int mask_stride, int x, int y,
+                                int width, int height,
+                                PixContext *pctx)
+{
+    int i, j;
+
+    dst  += x + y * stride;
+    mask += x + y * mask_stride;
+
+    if (mask[0] == 0xFF)
+        dst[0] = decode_top_left_pixel(acoder, pctx);
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            if (!i && !j || mask[i] != 0xFF)
+                continue;
+
+            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
+                                             i, j, width - i - 1);
+        }
+        dst  += stride;
+        mask += mask_stride;
+    }
+
+    return 0;
+}
+
+static av_cold void codec_init(MSS12Context *ctx)
+{
+    model_init(&ctx->intra_region, 2, THRESH_ADAPTIVE);
+    model_init(&ctx->inter_region, 2, THRESH_ADAPTIVE);
+    model_init(&ctx->split_mode,   3, THRESH_HIGH);
+    model_init(&ctx->edge_mode,    2, THRESH_HIGH);
+    model_init(&ctx->pivot,        3, THRESH_LOW);
+    pixctx_init(&ctx->intra_pix_ctx, 8);
+    pixctx_init(&ctx->inter_pix_ctx, 2);
+    ctx->corrupted = 1;
+}
+
+void ff_mss12_codec_reset(MSS12Context *ctx)
+{
+    model_reset(&ctx->intra_region);
+    model_reset(&ctx->inter_region);
+    model_reset(&ctx->split_mode);
+    model_reset(&ctx->edge_mode);
+    model_reset(&ctx->pivot);
+    pixctx_reset(&ctx->intra_pix_ctx);
+    pixctx_reset(&ctx->inter_pix_ctx);
+
+    ctx->corrupted = 0;
+}
+
+static int decode_pivot(MSS12Context *ctx, ArithCoder *acoder, int base)
+{
+    int val, inv;
+
+    inv = acoder->get_model_sym(acoder, &ctx->edge_mode);
+    val = acoder->get_model_sym(acoder, &ctx->pivot) + 1;
+
+    if (val > 2) {
+        if ((base + 1) / 2 - 2 <= 0) {
+            ctx->corrupted = 1;
+            return 0;
+        }
+        val = acoder->get_number(acoder, (base + 1) / 2 - 2) + 3;
+    }
+
+    if ((unsigned)val >= base) {
+        ctx->corrupted = 1;
+        return 0;
+    }
+
+    return inv ? base - val : val;
+}
+
+static int decode_region_intra(MSS12Context *ctx, ArithCoder *acoder,
+                               int x, int y, int width, int height)
+{
+    int mode;
+
+    mode = acoder->get_model_sym(acoder, &ctx->intra_region);
+
+    if (!mode) {
+        int i, pix;
+        int stride = ctx->pic_stride;
+        uint8_t *dst = ctx->pic_start + x + y * stride;
+
+        pix = decode_top_left_pixel(acoder, &ctx->intra_pix_ctx);
+        for (i = 0; i < height; i++, dst += stride)
+            memset(dst, pix, width);
+    } else {
+        return decode_region(ctx, acoder, ctx->pic_start,
+                             x, y, width, height, ctx->pic_stride,
+                             &ctx->intra_pix_ctx);
+    }
+
+    return 0;
+}
+
+static int decode_region_inter(MSS12Context *ctx, ArithCoder *acoder,
+                               int x, int y, int width, int height)
+{
+    int mode;
+
+    mode = acoder->get_model_sym(acoder, &ctx->inter_region);
+
+    if (!mode) {
+        mode = decode_top_left_pixel(acoder, &ctx->inter_pix_ctx);
+        if (mode != 0xFF) {
+            return 0;
+        } else {
+            return decode_region_intra(ctx, acoder, x, y, width, height);
+        }
+    } else {
+        if (decode_region(ctx, acoder, ctx->mask,
+                          x, y, width, height, ctx->mask_linesize,
+                          &ctx->inter_pix_ctx) < 0)
+            return -1;
+        return decode_region_masked(ctx, acoder, ctx->pic_start,
+                                    ctx->pic_stride, ctx->mask,
+                                    ctx->mask_linesize,
+                                    x, y, width, height,
+                                    &ctx->intra_pix_ctx);
+    }
+
+    return 0;
+}
+
+int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder,
+                         int x, int y, int width, int height)
+{
+    int mode, pivot;
+
+    if (ctx->corrupted)
+        return -1;
+
+    mode = acoder->get_model_sym(acoder, &ctx->split_mode);
+
+    switch (mode) {
+    case SPLIT_VERT:
+        pivot = decode_pivot(ctx, acoder, height);
+        if (ff_mss12_decode_rect(ctx, acoder, x, y, width, pivot))
+            return -1;
+        if (ff_mss12_decode_rect(ctx, acoder, x, y + pivot, width, height - pivot))
+            return -1;
+        break;
+    case SPLIT_HOR:
+        pivot = decode_pivot(ctx, acoder, width);
+        if (ff_mss12_decode_rect(ctx, acoder, x, y, pivot, height))
+            return -1;
+        if (ff_mss12_decode_rect(ctx, acoder, x + pivot, y, width - pivot, height))
+            return -1;
+        break;
+    case SPLIT_NONE:
+        if (ctx->keyframe)
+            return decode_region_intra(ctx, acoder, x, y, width, height);
+        else
+            return decode_region_inter(ctx, acoder, x, y, width, height);
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version)
+{
+    MSS12Context * const c = avctx->priv_data;
+    int i;
+
+    c->avctx = avctx;
+
+    if (avctx->extradata_size < 52 + 256 * 3) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient extradata size %d\n",
+               avctx->extradata_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (AV_RB32(avctx->extradata) < avctx->extradata_size) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Insufficient extradata size: expected %d got %d\n",
+               AV_RB32(avctx->extradata),
+               avctx->extradata_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d\n",
+           AV_RB32(avctx->extradata + 4), AV_RB32(avctx->extradata + 8));
+    c->free_colours     = AV_RB32(avctx->extradata + 48);
+    if ((unsigned)c->free_colours > 256) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Incorrect number of changeable palette entries: %d\n",
+               c->free_colours);
+        return AVERROR_INVALIDDATA;
+    }
+    av_log(avctx, AV_LOG_DEBUG, "%d free colour(s)\n", c->free_colours);
+    avctx->coded_width  = AV_RB32(avctx->extradata + 20);
+    avctx->coded_height = AV_RB32(avctx->extradata + 24);
+
+    av_log(avctx, AV_LOG_DEBUG, "Display dimensions %dx%d\n",
+           AV_RB32(avctx->extradata + 12), AV_RB32(avctx->extradata + 16));
+    av_log(avctx, AV_LOG_DEBUG, "Coded dimensions %dx%d\n",
+           avctx->coded_width, avctx->coded_height);
+    av_log(avctx, AV_LOG_DEBUG, "%g frames per second\n",
+           av_int2float(AV_RB32(avctx->extradata + 28)));
+    av_log(avctx, AV_LOG_DEBUG, "Bitrate %d bps\n",
+           AV_RB32(avctx->extradata + 32));
+    av_log(avctx, AV_LOG_DEBUG, "Max. lead time %g ms\n",
+           av_int2float(AV_RB32(avctx->extradata + 36)));
+    av_log(avctx, AV_LOG_DEBUG, "Max. lag time %g ms\n",
+           av_int2float(AV_RB32(avctx->extradata + 40)));
+    av_log(avctx, AV_LOG_DEBUG, "Max. seek time %g ms\n",
+           av_int2float(AV_RB32(avctx->extradata + 44)));
+
+    for (i = 0; i < 256; i++)
+        c->pal[i] = 0xFF << 24 | AV_RB24(avctx->extradata + 52 + i * 3);
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+
+    c->mask_linesize = FFALIGN(avctx->width, 16);
+    c->mask          = av_malloc(c->mask_linesize * avctx->height);
+    if (!c->mask) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot allocate mask plane\n");
+        return AVERROR(ENOMEM);
+    }
+
+    codec_init(c);
+
+    return 0;
+}
+
+av_cold int ff_mss12_decode_end(AVCodecContext *avctx)
+{
+    MSS12Context * const c = avctx->priv_data;
+
+    av_freep(&c->mask);
+
+    return 0;
+}
diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h
new file mode 100644
index 0000000000..c1c316044d
--- /dev/null
+++ b/libavcodec/mss12.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2012 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Common header for Microsoft Screen 1 and 2
+ */
+
+#ifndef AVCODEC_MSS12_H
+#define AVCODEC_MSS12_H
+
+#include "avcodec.h"
+#include "get_bits.h"
+
+#define MODEL_MIN_SYMS    2
+#define MODEL_MAX_SYMS  256
+#define THRESH_ADAPTIVE  -1
+#define THRESH_LOW       15
+#define THRESH_HIGH      50
+
+typedef struct Model {
+    int cum_prob[MODEL_MAX_SYMS + 1];
+    int weights[MODEL_MAX_SYMS + 1];
+    int idx2sym[MODEL_MAX_SYMS + 1];
+    int sym2idx[MODEL_MAX_SYMS + 1];
+    int num_syms;
+    int thr_weight, threshold;
+} Model;
+
+typedef struct ArithCoder {
+    int low, high, value;
+    GetBitContext *gb;
+    int (*get_model_sym)(struct ArithCoder *c, Model *m);
+    int (*get_number)   (struct ArithCoder *c, int n);
+} ArithCoder;
+
+typedef struct PixContext {
+    int cache_size, num_syms;
+    uint8_t cache[12];
+    Model cache_model, full_model;
+    Model sec_models[4][8][4];
+} PixContext;
+
+typedef struct MSS12Context {
+    AVCodecContext *avctx;
+    uint8_t        *pic_start;
+    int            pic_stride;
+    uint8_t        *mask;
+    int            mask_linesize;
+    uint32_t       pal[256];
+    int            free_colours;
+    int            keyframe;
+    Model          intra_region, inter_region;
+    Model          pivot, edge_mode, split_mode;
+    PixContext     intra_pix_ctx, inter_pix_ctx;
+    int            corrupted;
+} MSS12Context;
+
+int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder,
+                         int x, int y, int width, int height);
+void ff_mss12_model_update(Model *m, int val);
+void ff_mss12_codec_reset(MSS12Context *ctx);
+av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version);
+av_cold int ff_mss12_decode_end(AVCodecContext *avctx);
+
+#endif /* AVCODEC_MSS12_H */
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index e96f2f2239..7f27081d26 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -32,10 +32,8 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
-
+#include "dsputil.h"
 #include "qcelpdata.h"
-
-#include "celp_math.h"
 #include "celp_filters.h"
 #include "acelp_filters.h"
 #include "acelp_vectors.h"
@@ -401,8 +399,9 @@ static void apply_gain_ctrl(float *v_out, const float *v_ref, const float *v_in)
 
     for (i = 0; i < 160; i += 40)
         ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i,
-                                                ff_dot_productf(v_ref + i,
-                                                                v_ref + i, 40),
+                                                ff_scalarproduct_float_c(v_ref + i,
+                                                                         v_ref + i,
+                                                                         40),
                                                 40);
 }
 
@@ -678,8 +677,8 @@ static void postfilter(QCELPContext *q, float *samples, float *lpc)
     ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160);
 
     ff_adaptive_gain_control(samples, pole_out + 10,
-                             ff_dot_productf(q->formant_mem + 10,
-                                             q->formant_mem + 10, 160),
+                             ff_scalarproduct_float_c(q->formant_mem + 10,
+                                                      q->formant_mem + 10, 160),
                              160, 0.9375, &q->postfilter_agc_mem);
 }
 
diff --git a/libavcodec/ra288.c b/libavcodec/ra288.c
index d75a5b4550..a8e9eabb3b 100644
--- a/libavcodec/ra288.c
+++ b/libavcodec/ra288.c
@@ -25,7 +25,6 @@
 #include "get_bits.h"
 #include "ra288.h"
 #include "lpc.h"
-#include "celp_math.h"
 #include "celp_filters.h"
 
 #define MAX_BACKWARD_FILTER_ORDER  36
@@ -74,7 +73,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
 static void convolve(float *tgt, const float *src, int len, int n)
 {
     for (; n >= 0; n--)
-        tgt[n] = ff_dot_productf(src, src - n, len);
+        tgt[n] = ff_scalarproduct_float_c(src, src - n, len);
 
 }
 
@@ -103,7 +102,7 @@ static void decode(RA288Context *ractx, float gain, int cb_coef)
     for (i=0; i < 5; i++)
         buffer[i] = codetable[cb_coef][i] * sumsum;
 
-    sum = ff_dot_productf(buffer, buffer, 5);
+    sum = ff_scalarproduct_float_c(buffer, buffer, 5);
 
     sum = FFMAX(sum, 5. / (1<<24));
 
diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c
index b3951b46f3..c95f5fe2a5 100644
--- a/libavcodec/sipr.c
+++ b/libavcodec/sipr.c
@@ -32,7 +32,6 @@
 #include "dsputil.h"
 
 #include "lsp.h"
-#include "celp_math.h"
 #include "acelp_vectors.h"
 #include "acelp_pitch_delay.h"
 #include "acelp_filters.h"
@@ -411,7 +410,7 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
                               SUBFR_SIZE);
 
         avg_energy =
-            (0.01 + ff_dot_productf(fixed_vector, fixed_vector, SUBFR_SIZE))/
+            (0.01 + ff_scalarproduct_float_c(fixed_vector, fixed_vector, SUBFR_SIZE)) /
                 SUBFR_SIZE;
 
         ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0];
@@ -453,9 +452,9 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
 
     if (ctx->mode == MODE_5k0) {
         for (i = 0; i < subframe_count; i++) {
-            float energy = ff_dot_productf(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i*SUBFR_SIZE,
-                                           ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i*SUBFR_SIZE,
-                                           SUBFR_SIZE);
+            float energy = ff_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
+                                                    ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
+                                                    SUBFR_SIZE);
             ff_adaptive_gain_control(&synth[i * SUBFR_SIZE],
                                      &synth[i * SUBFR_SIZE], energy,
                                      SUBFR_SIZE, 0.9, &ctx->postfilter_agc);
diff --git a/libavcodec/sipr16k.c b/libavcodec/sipr16k.c
index 96079d93b5..c2e090bb0a 100644
--- a/libavcodec/sipr16k.c
+++ b/libavcodec/sipr16k.c
@@ -26,8 +26,9 @@
 #include "sipr.h"
 #include "libavutil/common.h"
 #include "libavutil/mathematics.h"
+#include "dsputil.h"
 #include "lsp.h"
-#include "celp_math.h"
+#include "celp_filters.h"
 #include "acelp_vectors.h"
 #include "acelp_pitch_delay.h"
 #include "acelp_filters.h"
@@ -163,10 +164,10 @@ static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v,
                                      int subframe_size, int ma_pred_order)
 {
     mr_energy +=
-        ff_dot_productf(quant_energy, ma_prediction_coeff, ma_pred_order);
+        ff_scalarproduct_float_c(quant_energy, ma_prediction_coeff, ma_pred_order);
 
     mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) /
-        sqrt((0.01 + ff_dot_productf(fc_v, fc_v, subframe_size)));
+        sqrt((0.01 + ff_scalarproduct_float_c(fc_v, fc_v, subframe_size)));
     return mr_energy;
 }
 
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 5a8a84c4cf..a5269f43a0 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -452,4 +452,9 @@ int ff_vc1_parse_frame_header    (VC1Context *v, GetBitContext *gb);
 int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext *gb);
 int ff_vc1_init_common(VC1Context *v);
 
+av_cold int  ff_vc1_decode_init_alloc_tables(VC1Context *v);
+av_cold void ff_vc1_init_transposed_scantables(VC1Context *v);
+av_cold int  ff_vc1_decode_end(AVCodecContext *avctx);
+void ff_vc1_decode_blocks(VC1Context *v);
+
 #endif /* AVCODEC_VC1_H */
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 2fc4913729..0362c1a04f 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -4734,7 +4734,7 @@ static void vc1_decode_skip_blocks(VC1Context *v)
     s->pict_type = AV_PICTURE_TYPE_P;
 }
 
-static void vc1_decode_blocks(VC1Context *v)
+void ff_vc1_decode_blocks(VC1Context *v)
 {
 
     v->s.esc3_level_length = 0;
@@ -5048,7 +5048,7 @@ static void vc1_sprite_flush(AVCodecContext *avctx)
 
 #endif
 
-static av_cold int vc1_decode_init_alloc_tables(VC1Context *v)
+av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
 {
     MpegEncContext *s = &v->s;
     int i;
@@ -5114,6 +5114,21 @@ static av_cold int vc1_decode_init_alloc_tables(VC1Context *v)
     return 0;
 }
 
+av_cold void ff_vc1_init_transposed_scantables(VC1Context *v)
+{
+    int i;
+    for (i = 0; i < 64; i++) {
+#define transpose(x) ((x >> 3) | ((x & 7) << 3))
+        v->zz_8x8[0][i] = transpose(ff_wmv1_scantable[0][i]);
+        v->zz_8x8[1][i] = transpose(ff_wmv1_scantable[1][i]);
+        v->zz_8x8[2][i] = transpose(ff_wmv1_scantable[2][i]);
+        v->zz_8x8[3][i] = transpose(ff_wmv1_scantable[3][i]);
+        v->zzi_8x8[i]   = transpose(ff_vc1_adv_interlaced_8x8_zz[i]);
+    }
+    v->left_blk_sh = 0;
+    v->top_blk_sh  = 3;
+}
+
 /** Initialize a VC1/WMV3 decoder
  * @todo TODO: Handle VC-1 IDUs (Transport level?)
  * @todo TODO: Decypher remaining bits in extra_data
@@ -5123,7 +5138,6 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
     VC1Context *v = avctx->priv_data;
     MpegEncContext *s = &v->s;
     GetBitContext gb;
-    int i;
 
     /* save the container output size for WMImage */
     v->output_width  = avctx->width;
@@ -5226,16 +5240,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
     s->mb_height = (avctx->coded_height + 15) >> 4;
 
     if (v->profile == PROFILE_ADVANCED || v->res_fasttx) {
-        for (i = 0; i < 64; i++) {
-#define transpose(x) ((x >> 3) | ((x & 7) << 3))
-            v->zz_8x8[0][i] = transpose(ff_wmv1_scantable[0][i]);
-            v->zz_8x8[1][i] = transpose(ff_wmv1_scantable[1][i]);
-            v->zz_8x8[2][i] = transpose(ff_wmv1_scantable[2][i]);
-            v->zz_8x8[3][i] = transpose(ff_wmv1_scantable[3][i]);
-            v->zzi_8x8[i] = transpose(ff_vc1_adv_interlaced_8x8_zz[i]);
-        }
-        v->left_blk_sh = 0;
-        v->top_blk_sh  = 3;
+        ff_vc1_init_transposed_scantables(v);
     } else {
         memcpy(v->zz_8x8, ff_wmv1_scantable, 4*64);
         v->left_blk_sh = 3;
@@ -5261,7 +5266,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
 /** Close a VC1/WMV3 decoder
  * @warning Initial try at using MpegEncContext stuff
  */
-static av_cold int vc1_decode_end(AVCodecContext *avctx)
+av_cold int ff_vc1_decode_end(AVCodecContext *avctx)
 {
     VC1Context *v = avctx->priv_data;
     int i;
@@ -5453,11 +5458,11 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     if (s->context_initialized &&
         (s->width  != avctx->coded_width ||
          s->height != avctx->coded_height)) {
-        vc1_decode_end(avctx);
+        ff_vc1_decode_end(avctx);
     }
 
     if (!s->context_initialized) {
-        if (ff_msmpeg4_decode_init(avctx) < 0 || vc1_decode_init_alloc_tables(v) < 0)
+        if (ff_msmpeg4_decode_init(avctx) < 0 || ff_vc1_decode_init_alloc_tables(v) < 0)
             return -1;
 
         s->low_delay = !avctx->has_b_frames || v->res_sprite;
@@ -5612,7 +5617,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 av_log(v->s.avctx, AV_LOG_ERROR, "end mb y %d %d invalid\n", s->end_mb_y, s->start_mb_y);
                 continue;
             }
-            vc1_decode_blocks(v);
+            ff_vc1_decode_blocks(v);
             if (i != n_slices)
                 s->gb = slices[i].gb;
         }
@@ -5693,7 +5698,7 @@ AVCodec ff_vc1_decoder = {
     .id             = AV_CODEC_ID_VC1,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
@@ -5708,7 +5713,7 @@ AVCodec ff_wmv3_decoder = {
     .id             = AV_CODEC_ID_WMV3,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
@@ -5724,7 +5729,7 @@ AVCodec ff_wmv3_vdpau_decoder = {
     .id             = AV_CODEC_ID_WMV3,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
@@ -5740,7 +5745,7 @@ AVCodec ff_vc1_vdpau_decoder = {
     .id             = AV_CODEC_ID_VC1,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
     .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
@@ -5756,7 +5761,7 @@ AVCodec ff_wmv3image_decoder = {
     .id             = AV_CODEC_ID_WMV3IMAGE,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
     .flush          = vc1_sprite_flush,
@@ -5772,7 +5777,7 @@ AVCodec ff_vc1image_decoder = {
     .id             = AV_CODEC_ID_VC1IMAGE,
     .priv_data_size = sizeof(VC1Context),
     .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
     .flush          = vc1_sprite_flush,
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index c61b223793..0a9961c882 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -28,11 +28,12 @@
 #define UNCHECKED_BITSTREAM_READER 1
 
 #include <math.h>
+
+#include "dsputil.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "put_bits.h"
 #include "wmavoice_data.h"
-#include "celp_math.h"
 #include "celp_filters.h"
 #include "acelp_vectors.h"
 #include "acelp_filters.h"
@@ -518,7 +519,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
 
     /* find best fitting point in history */
     do {
-        dot = ff_dot_productf(in, ptr, size);
+        dot = ff_scalarproduct_float_c(in, ptr, size);
         if (dot > optimal_gain) {
             optimal_gain  = dot;
             best_hist_ptr = ptr;
@@ -527,7 +528,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
 
     if (optimal_gain <= 0)
         return -1;
-    dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
+    dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
     if (dot <= 0) // would be 1.0
         return -1;
 
@@ -557,8 +558,8 @@ static float tilt_factor(const float *lpcs, int n_lpcs)
 {
     float rh0, rh1;
 
-    rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
-    rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
+    rh0 = 1.0     + ff_scalarproduct_float_c(lpcs,  lpcs,    n_lpcs);
+    rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
 
     return rh1 / rh0;
 }
@@ -651,7 +652,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
                              -1.8 * tilt_factor(coeffs, remainder - 1),
                              coeffs, remainder);
     }
-    sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
+    sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
     for (n = 0; n < remainder; n++)
         coeffs[n] *= sq;
 }
@@ -1315,7 +1316,7 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
     /* Calculate gain for adaptive & fixed codebook signal.
      * see ff_amr_set_fixed_gain(). */
     idx = get_bits(gb, 7);
-    fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
+    fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
     acb_gain = wmavoice_gain_codebook_acb[idx];
     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
diff --git a/library.mak b/library.mak
index a53c635ada..9c01e34c73 100644
--- a/library.mak
+++ b/library.mak
@@ -35,14 +35,14 @@ install-libs-$(CONFIG_SHARED): install-lib$(NAME)-shared
 
 define RULES
 $(EXAMPLES) $(TESTPROGS) $(TOOLS): %$(EXESUF): %.o
-	$$(LD) $(LDFLAGS) -o $$@ $$^ -l$(FULLNAME) $(FFEXTRALIBS) $$(ELIBS)
+	$$(LD) $(LDFLAGS) $$(LD_O) $$^ -l$(FULLNAME) $(FFEXTRALIBS) $$(ELIBS)
 
 $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 	$(Q)cd ./$(SUBDIR) && $(LN_S) $(SLIBNAME_WITH_MAJOR) $(SLIBNAME)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SUBDIR)lib$(NAME).ver
 	$(SLIB_CREATE_DEF_CMD)
-	$$(LD) $(SHFLAGS) $(LDFLAGS) -o $$@ $$(filter %.o,$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
+	$$(LD) $(SHFLAGS) $(LDFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
 	$(SLIB_EXTRA_CMD)
 
 ifdef SUBDIR