vpx_codec_dec_init: check that the iface is a decoder

Make sure the given interface is actually a decoder interface before initializing it. Change-Id: Ie48d737f2956cc2f0891666de5ea87251e96bc49
Remove unused vp8_get4x4sse_cs_mmx declaration
2011-03-24 15:05:10 +02:00 · 2011-03-24 15:05:10 +02:00 · 2011-03-24 15:05:10 +02:00 · 2011-03-24 15:05:10 +02:00 · 2011-03-24 15:05:09 +02:00 · 2011-03-24 15:05:09 +02:00
44 changed files with 695 additions and 813 deletions
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -20,7 +20,7 @@

 extern  void vp8_init_scan_order_mask();

-void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
+static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
 {
    int i;
    vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
@@ -119,7 +119,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
        return 1;
    }

-    vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
+    update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);

    return 0;
 }
--- a/vp8/common/filter.c
+++ b/vp8/common/filter.c
@@ -38,7 +38,7 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
    { 0, -1,   12,  123,  -6,  0 },
 };

-void vp8_filter_block2d_first_pass
+static void filter_block2d_first_pass
 (
    unsigned char *src_ptr,
    int *output_ptr,
@@ -82,7 +82,7 @@ void vp8_filter_block2d_first_pass
    }
 }

-void vp8_filter_block2d_second_pass
+static void filter_block2d_second_pass
 (
    int *src_ptr,
    unsigned char *output_ptr,
@@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
 }


-void vp8_filter_block2d
+static void filter_block2d
 (
    unsigned char  *src_ptr,
    unsigned char  *output_ptr,
@@ -142,39 +142,13 @@ void vp8_filter_block2d
    int FData[9*4]; /* Temp data buffer used in filtering */

    /* First filter 1-D horizontally... */
-    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);

    /* then filter verticaly... */
-    vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
+    filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }


-void vp8_block_variation_c
-(
-    unsigned char  *src_ptr,
-    int   src_pixels_per_line,
-    int *HVar,
-    int *VVar
-)
-{
-    int i, j;
-    unsigned char *Ptr = src_ptr;
-
-    for (i = 0; i < 4; i++)
-    {
-        for (j = 0; j < 4; j++)
-        {
-            *HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
-            *VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
-        }
-
-        Ptr += src_pixels_per_line;
-    }
-}
-
-
-
-
 void vp8_sixtap_predict_c
 (
    unsigned char  *src_ptr,
@@ -191,7 +165,7 @@ void vp8_sixtap_predict_c
    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

-    vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
+    filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 }
 void vp8_sixtap_predict8x8_c
 (
@@ -211,11 +185,11 @@ void vp8_sixtap_predict8x8_c
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);


    /* then filter verticaly... */
-    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);

 }

@@ -237,11 +211,11 @@ void vp8_sixtap_predict8x4_c
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);


    /* then filter verticaly... */
-    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
+    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);

 }

@@ -264,10 +238,10 @@ void vp8_sixtap_predict16x16_c
    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */

    /* First filter 1-D horizontally... */
-    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);

    /* then filter verticaly... */
-    vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
+    filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);

 }

@@ -294,7 +268,7 @@ void vp8_sixtap_predict16x16_c
 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 *
 ****************************************************************************/
-void vp8_filter_block2d_bil_first_pass
+static void filter_block2d_bil_first_pass
 (
    unsigned char  *src_ptr,
    unsigned short *dst_ptr,
@@ -345,7 +319,7 @@ void vp8_filter_block2d_bil_first_pass
 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 *
 ****************************************************************************/
-void vp8_filter_block2d_bil_second_pass
+static void filter_block2d_bil_second_pass
 (
    unsigned short *src_ptr,
    unsigned char  *dst_ptr,
@@ -399,7 +373,7 @@ void vp8_filter_block2d_bil_second_pass
 *  SPECIAL NOTES : The largest block size can be handled here is 16x16
 *
 ****************************************************************************/
-void vp8_filter_block2d_bil
+static void filter_block2d_bil
 (
    unsigned char *src_ptr,
    unsigned char *dst_ptr,
@@ -415,10 +389,10 @@ void vp8_filter_block2d_bil
    unsigned short FData[17*16];    /* Temp data buffer used in filtering */

    /* First filter 1-D horizontally... */
-    vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
+    filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);

    /* then 1-D vertically... */
-    vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
+    filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
 }


@@ -444,19 +418,19 @@ void vp8_bilinear_predict4x4_c
        unsigned char temp2[16];

        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
-        vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
+        filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);

        for (i = 0; i < 16; i++)
        {
            if (temp1[i] != temp2[i])
            {
                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
-                vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
+                filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
            }
        }
    }
 #endif
-    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);

 }

@@ -476,7 +450,7 @@ void vp8_bilinear_predict8x8_c
    HFilter = vp8_bilinear_filters[xoffset];
    VFilter = vp8_bilinear_filters[yoffset];

-    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);

 }

@@ -496,7 +470,7 @@ void vp8_bilinear_predict8x4_c
    HFilter = vp8_bilinear_filters[xoffset];
    VFilter = vp8_bilinear_filters[yoffset];

-    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);

 }

@@ -516,5 +490,5 @@ void vp8_bilinear_predict16x16_c
    HFilter = vp8_bilinear_filters[xoffset];
    VFilter = vp8_bilinear_filters[yoffset];

-    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
 }
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -11,6 +11,13 @@

 #include "findnearmv.h"

+const unsigned char vp8_mbsplit_offset[4][16] = {
+    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
 /* Predict motion vectors using those from already-decoded nearby blocks.
   Note that we only consider one 4x4 subblock from each candidate 16x16
   macroblock.   */
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -70,4 +70,6 @@ const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);

 const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);

+extern const unsigned char vp8_mbsplit_offset[4][16];
+
 #endif
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -17,7 +17,7 @@ typedef enum
    DEST = 1
 } BLOCKSET;

-void vp8_setup_block
+static void setup_block
 (
    BLOCKD *b,
    int mv_stride,
@@ -43,7 +43,8 @@ void vp8_setup_block

 }

-void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
+
+static void setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
 {
    int block;

@@ -64,16 +65,16 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)

    for (block = 0; block < 16; block++) /* y blocks */
    {
-        vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
+        setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
                        (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
    }

    for (block = 16; block < 20; block++) /* U and V blocks */
    {
-        vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
+        setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
                        ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);

-        vp8_setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
+        setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
                        ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
    }
 }
@@ -124,6 +125,6 @@ void vp8_build_block_doffsets(MACROBLOCKD *x)
 {

    /* handle the destination pitch features */
-    vp8_setup_macroblock(x, DEST);
-    vp8_setup_macroblock(x, PRED);
+    setup_macroblock(x, DEST);
+    setup_macroblock(x, PRED);
 }
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -211,7 +211,7 @@ void vp8_post_proc_down_and_across_c
    }
 }

-int vp8_q2mbl(int x)
+static int q2mbl(int x)
 {
    if (x < 20) x = 20;

@@ -314,8 +314,8 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG         *source,
    (void) flag;

    POSTPROC_INVOKE(rtcd, downacross)(source->y_buffer, post->y_buffer, source->y_stride,  post->y_stride, source->y_height, source->y_width,  ppl);
-    POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
-    POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
+    POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
+    POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));

    POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
    POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -168,7 +168,7 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
    }
 }

-void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
+static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
 {
    unsigned char *ptr_base;
    unsigned char *ptr;
@@ -187,7 +187,7 @@ void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
    }
 }

-void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
+static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
 {
    unsigned char *ptr_base;
    unsigned char *ptr;
@@ -246,7 +246,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
            BLOCKD *d1 = &x->block[i+1];

            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                vp8_build_inter_predictors2b(x, d0, 8);
+                build_inter_predictors2b(x, d0, 8);
            else
            {
                vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
@@ -291,7 +291,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
            for (i = 0; i < 4; i++)
            {
                BLOCKD *d = &x->block[bbb[i]];
-                vp8_build_inter_predictors4b(x, d, 16);
+                build_inter_predictors4b(x, d, 16);
            }

        }
@@ -303,7 +303,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
                BLOCKD *d1 = &x->block[i+1];

                if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                    vp8_build_inter_predictors2b(x, d0, 16);
+                    build_inter_predictors2b(x, d0, 16);
                else
                {
                    vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
@@ -372,7 +372,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
            for (i = 0; i < 4; i++)
            {
                BLOCKD *d = &x->block[bbb[i]];
-                vp8_build_inter_predictors4b(x, d, 16);
+                build_inter_predictors4b(x, d, 16);
            }
        }
        else
@@ -383,7 +383,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
                BLOCKD *d1 = &x->block[i+1];

                if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                    vp8_build_inter_predictors2b(x, d0, 16);
+                    build_inter_predictors2b(x, d0, 16);
                else
                {
                    vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
@@ -400,7 +400,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
            BLOCKD *d1 = &x->block[i+1];

            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
-                vp8_build_inter_predictors2b(x, d0, 8);
+                build_inter_predictors2b(x, d0, 8);
            else
            {
                vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
@@ -600,7 +600,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
            for (i = 0; i < 4; i++)
            {
                BLOCKD *d = &x->block[bbb[i]];
-                /*vp8_build_inter_predictors4b(x, d, 16);*/
+                /*build_inter_predictors4b(x, d, 16);*/

                {
                    unsigned char *ptr_base;
@@ -630,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

                if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
                {
-                    /*vp8_build_inter_predictors2b(x, d0, 16);*/
+                    /*build_inter_predictors2b(x, d0, 16);*/
                    unsigned char *ptr_base;
                    unsigned char *ptr;
                    unsigned char *pred_ptr = d0->predictor;
@@ -662,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
            {
-                /*vp8_build_inter_predictors2b(x, d0, 8);*/
+                /*build_inter_predictors2b(x, d0, 8);*/
                unsigned char *ptr_base;
                unsigned char *ptr;
                unsigned char *pred_ptr = d0->predictor;
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -113,97 +113,6 @@ nextrow:
    ret


-;
-; THIS FUNCTION APPEARS TO BE UNUSED
-;
-;void vp8_filter_block1d_v6_mmx
-;(
-;   short *src_ptr,
-;   unsigned char *output_ptr,
-;   unsigned int pixels_per_line,
-;   unsigned int pixel_step,
-;   unsigned int output_height,
-;   unsigned int output_width,
-;   short * vp8_filter
-;)
-global sym(vp8_filter_block1d_v6_mmx)
-sym(vp8_filter_block1d_v6_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 7
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        movq      mm5, [GLOBAL(rd)]
-        push        rbx
-        mov         rbx, arg(6) ;vp8_filter
-        movq      mm1, [rbx + 16]             ; do both the negative taps first!!!
-        movq      mm2, [rbx + 32]         ;
-        movq      mm6, [rbx + 48]        ;
-        movq      mm7, [rbx + 64]        ;
-
-        movsxd      rdx, dword ptr arg(2) ;pixels_per_line
-        mov         rdi, arg(1) ;output_ptr
-        mov         rsi, arg(0) ;src_ptr
-        sub         rsi, rdx
-        sub         rsi, rdx
-        movsxd      rcx, DWORD PTR arg(4) ;output_height
-        movsxd      rax, DWORD PTR arg(5) ;output_width      ; destination pitch?
-        pxor        mm0, mm0              ; mm0 = 00000000
-
-
-nextrow_v:
-        movq        mm3, [rsi+rdx]        ; mm3 = p0..p8  = row -1
-        pmullw      mm3, mm1              ; mm3 *= kernel 1 modifiers.
-
-
-        movq        mm4, [rsi + 4*rdx]      ; mm4 = p0..p3  = row 2
-        pmullw      mm4, mm7              ; mm4 *= kernel 4 modifiers.
-        paddsw      mm3, mm4              ; mm3 += mm4
-
-        movq        mm4, [rsi + 2*rdx]           ; mm4 = p0..p3  = row 0
-        pmullw      mm4, mm2              ; mm4 *= kernel 2 modifiers.
-        paddsw      mm3, mm4              ; mm3 += mm4
-
-        movq        mm4, [rsi]            ; mm4 = p0..p3  = row -2
-        pmullw      mm4, [rbx]            ; mm4 *= kernel 0 modifiers.
-        paddsw      mm3, mm4              ; mm3 += mm4
-
-
-        add         rsi, rdx              ; move source forward 1 line to avoid 3 * pitch
-        movq        mm4, [rsi + 2*rdx]     ; mm4 = p0..p3  = row 1
-        pmullw      mm4, mm6              ; mm4 *= kernel 3 modifiers.
-        paddsw      mm3, mm4              ; mm3 += mm4
-
-        movq        mm4, [rsi + 4*rdx]    ; mm4 = p0..p3  = row 3
-        pmullw      mm4, [rbx +80]        ; mm4 *= kernel 3 modifiers.
-        paddsw      mm3, mm4              ; mm3 += mm4
-
-
-        paddsw      mm3, mm5               ; mm3 += round value
-        psraw       mm3, VP8_FILTER_SHIFT     ; mm3 /= 128
-        packuswb    mm3, mm0              ; pack and saturate
-
-        movd        [rdi],mm3             ; store the results in the destination
-
-        add         rdi,rax;
-
-        dec         rcx                   ; decrement count
-        jnz         nextrow_v             ; next row
-
-        pop         rbx
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
 ;void vp8_filter_block1dc_v6_mmx
 ;(
 ;   short *src_ptr,
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -228,15 +228,8 @@ unsigned int vp8_mv_cont_count[5][4] =
 };
 #endif

-unsigned char vp8_mbsplit_offset[4][16] = {
-    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
-};
-
-unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
-unsigned char vp8_mbsplit_fill_offset[4][16] = {
+static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1};
+static const unsigned char mbsplit_fill_offset[4][16] = {
    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
    { 0,  1,  4,  5,  8,  9, 12, 13,  2,  3,   6,  7, 10, 11, 14, 15},
    { 0,  1,  4,  5,  2,  3,  6,  7,  8,  9,  12, 13, 10, 11, 14, 15},
@@ -246,7 +239,7 @@ unsigned char vp8_mbsplit_fill_offset[4][16] = {



-void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
+static void mb_mode_mv_init(VP8D_COMP *pbi)
 {
    vp8_reader *const bc = & pbi->bc;
    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
@@ -287,7 +280,7 @@ void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
    }
 }

-void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                            int mb_row, int mb_col)
 {
    const MV Zero = { 0, 0};
@@ -405,10 +398,10 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                    /* Fill (uniform) modes, mvs of jth subset.
                     Must do it here because ensuing subsets can
                     refer back to us via "left" or "above". */
-                    unsigned char *fill_offset;
-                    unsigned int fill_count = vp8_mbsplit_fill_count[s];
+                    const unsigned char *fill_offset;
+                    unsigned int fill_count = mbsplit_fill_count[s];

-                    fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
+                    fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];

                    do {
                        mi->bmi[ *fill_offset] = bmi;
@@ -525,7 +518,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
    MODE_INFO *mi = pbi->common.mi;
    int mb_row = -1;

-    vp8_mb_mode_mv_init(pbi);
+    mb_mode_mv_init(pbi);

    while (++mb_row < pbi->common.mb_rows)
    {
@@ -543,11 +536,11 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)

        while (++mb_col < pbi->common.mb_cols)
        {
-            /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
+            /*read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
            if(pbi->common.frame_type == KEY_FRAME)
                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
            else
-                vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
+                read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);

            mi++;       /* next macroblock */
        }
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -175,7 +175,7 @@ void clamp_mvs(MACROBLOCKD *xd)

 }

-void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
+static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
    int eobtotal = 0;
    int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
@@ -320,10 +320,8 @@ FILE *vpxlog = 0;



-void vp8_decode_mb_row(VP8D_COMP *pbi,
-                       VP8_COMMON *pc,
-                       int mb_row,
-                       MACROBLOCKD *xd)
+static void
+decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
 {

    int i;
@@ -395,7 +393,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
        else
        pbi->debugoutput =0;
        */
-        vp8_decode_macroblock(pbi, xd);
+        decode_macroblock(pbi, xd);

        /* check if the boolean decoder has suffered an error */
        xd->corrupted |= vp8dx_bool_error(xd->current_bc);
@@ -901,7 +899,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
                    ibc = 0;
            }

-            vp8_decode_mb_row(pbi, pc, mb_row, xd);
+            decode_mb_row(pbi, pc, mb_row, xd);
        }
    }

--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -19,7 +19,13 @@
 #define BOOL_DATA UINT8

 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
+{
+    0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
+    6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
+    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
+    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
+};
 #define EOB_CONTEXT_NODE            0
 #define ZERO_CONTEXT_NODE           1
 #define ONE_CONTEXT_NODE            2
@@ -135,7 +141,7 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
            Prob = coef_probs; \
            if(c<15) {\
            ++c; \
-            Prob += vp8_coef_bands_x[c]; \
+            Prob += coef_bands_x[c]; \
            goto branch; \
            } goto BLOCK_FINISHED; /*for malformed input */\
        } \
@@ -244,7 +250,7 @@ BLOCK_LOOP:
    Prob += v * ENTROPY_NODES;

 DO_WHILE:
-    Prob += vp8_coef_bands_x[c];
+    Prob += coef_bands_x[c];
    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);

 CHECK_0_:
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -37,43 +37,6 @@
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);

-#if CONFIG_DEBUG
-void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
-{
-    FILE *yuv_file = fopen((char *)name, "ab");
-    unsigned char *src = s->y_buffer;
-    int h = s->y_height;
-
-    do
-    {
-        fwrite(src, s->y_width, 1,  yuv_file);
-        src += s->y_stride;
-    }
-    while (--h);
-
-    src = s->u_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1,  yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    src = s->v_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1, yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    fclose(yuv_file);
-}
-#endif

 void vp8dx_initialize()
 {
@@ -155,35 +118,6 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
 }


-void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-    (void) x;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-}
-
-int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-
-    return -1;
-}
-
 int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@@ -203,6 +137,8 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C

    return 0;
 }
+
+
 int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@@ -459,12 +395,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
        vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
    }

-#if 0
-    /* DEBUG code */
-    /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
-    if (cm->current_video_frame <= 5)
-        write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
-#endif

    vp8_clear_system_state();

--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -113,6 +113,7 @@ typedef struct VP8Decompressor
    pthread_t           *h_decoding_thread;
    sem_t               *h_event_start_decoding;
    sem_t                h_event_end_decoding;
+    sem_t				*h_mb_counter;
    /* end of threading data */
 #endif

--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -33,7 +33,7 @@ extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
 #define RTCD_VTABLE(x) NULL
 #endif

-void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
+static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
 {
    VP8_COMMON *const pc = & pbi->common;
    int i, j;
@@ -87,7 +87,7 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
 }


-void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
+static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
 {
    int eobtotal = 0;
    int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
@@ -214,7 +214,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
 }


-THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
+static THREAD_FUNCTION thread_decoding_proc(void *p_data)
 {
    int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
    VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
@@ -275,6 +275,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)

                    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
                    {
+/*
                        if ((mb_col & (nsync-1)) == 0)
                        {
                            while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
@@ -283,6 +284,8 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                thread_sleep(0);
                            }
                        }
+*/
+                        sem_wait(&pbi->h_mb_counter[ithread]);

                        if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
                        {
@@ -318,7 +321,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;

                        vp8_build_uvmvs(xd, pc->full_pixel);
-                        vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
+                        decode_macroblock(pbi, xd, mb_row, mb_col);

                        if (pbi->common.filter_level)
                        {
@@ -383,6 +386,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)

                        /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
                        pbi->mt_current_mb_col[mb_row] = mb_col;
+
+                        if (mb_row != pbi->common.mb_rows-1)
+                            sem_post(&pbi->h_mb_counter[ithread+1]);
                    }

                    /* adjust to the next row of mbs */
@@ -438,6 +444,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)

        CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
        CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+        CHECK_MEM_ERROR(pbi->h_mb_counter, vpx_malloc(sizeof(sem_t) * (pbi->decoding_thread_count + 1)));
        CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
        vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
        CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
@@ -450,9 +457,12 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
            pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
            pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];

-            pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
+            pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
        }

+        for (ithread = 0; ithread < pbi->decoding_thread_count + 1; ithread++)
+            sem_init(&pbi->h_mb_counter[ithread], 0, 0);
+
        sem_init(&pbi->h_event_end_decoding, 0, 0);

        pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
@@ -615,6 +625,9 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
            sem_destroy(&pbi->h_event_start_decoding[i]);
        }

+        for (i = 0; i < pbi->decoding_thread_count + 1; i++)
+            sem_destroy(&pbi->h_mb_counter[i]);
+
        sem_destroy(&pbi->h_event_end_decoding);

            vpx_free(pbi->h_decoding_thread);
@@ -623,6 +636,11 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
            vpx_free(pbi->h_event_start_decoding);
            pbi->h_event_start_decoding = NULL;

+        if (pbi->h_mb_counter)
+        {
+            vpx_free(pbi->h_mb_counter);
+            pbi->h_mb_counter = NULL;
+        }
            vpx_free(pbi->mb_row_di);
            pbi->mb_row_di = NULL ;

@@ -632,7 +650,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 }


-void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
+static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
 {
    VP8_COMMON *cm  = &pbi->common;
    MACROBLOCKD *mbd = &pbi->mb;
@@ -715,14 +733,17 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
            vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
            vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
        }
-        vp8mt_lpf_init(pbi, pc->filter_level);
+        lpf_init(pbi, pc->filter_level);
    }

-    vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
+    setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
+
+

    for (i = 0; i < pbi->decoding_thread_count; i++)
        sem_post(&pbi->h_event_start_decoding[i]);

+
    for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    {

@@ -755,6 +776,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)

            for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
            {
+/*
                if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
                    while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
                    {
@@ -762,6 +784,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                        thread_sleep(0);
                    }
                }
+*/
+                if(mb_row > 0)
+                    sem_wait(&pbi->h_mb_counter[pbi->decoding_thread_count]);

                if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
                {
@@ -803,7 +828,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                }

                vp8_build_uvmvs(xd, pc->full_pixel);
-                vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
+                decode_macroblock(pbi, xd, mb_row, mb_col);

                /* check if the boolean decoder has suffered an error */
                xd->corrupted |= vp8dx_bool_error(xd->current_bc);
@@ -870,6 +895,10 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                xd->above_context++;

                pbi->mt_current_mb_col[mb_row] = mb_col;
+
+                /* macroblock counter */
+                if (mb_row != pbi->common.mb_rows-1)
+                    sem_post(&pbi->h_mb_counter[0]);
            }

            /* adjust to the next row of mbs */
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -17,7 +17,7 @@
 #if HAVE_MMX
 void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);

-void vp8_dequantize_b_mmx(BLOCKD *d)
+static void dequantize_b_mmx(BLOCKD *d)
 {
    short *sq = (short *) d->qcoeff;
    short *dq = (short *) d->dqcoeff;
@@ -41,7 +41,7 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
 #if HAVE_MMX
    if (flags & HAS_MMX)
    {
-        pbi->dequant.block               = vp8_dequantize_b_mmx;
+        pbi->dequant.block               = dequantize_b_mmx;
        pbi->dequant.idct_add            = vp8_dequant_idct_add_mmx;
        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_mmx;
        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -59,9 +59,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/

        /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;*/
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;*/
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_armv6;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_armv6;
        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;

        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
--- a/vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
@@ -0,0 +1,262 @@
+;
+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+    EXPORT |vp8_fast_fdct4x4_armv6|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE, READONLY
+; void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
+|vp8_fast_fdct4x4_armv6| PROC
+
+    stmfd       sp!, {r4 - r12, lr}
+
+    ; PART 1
+
+    ; coeffs 0-3
+    ldrd        r4, r5, [r0]        ; [i1 | i0] [i3 | i2]
+
+    ldr         r10, c7500
+    ldr         r11, c14500
+    ldr         r12, c0x22a453a0    ; [2217*4 | 5352*4]
+    ldr         lr, c0x00080008
+    ror         r5, r5, #16         ; [i2 | i3]
+
+    qadd16      r6, r4, r5          ; [i1+i2 | i0+i3] = [b1 | a1] without shift
+    qsub16      r7, r4, r5          ; [i1-i2 | i0-i3] = [c1 | d1] without shift
+
+    add         r0, r0, r2          ; update input pointer
+
+    qadd16      r7, r7, r7          ; 2*[c1|d1] --> we can use smlad and smlsd
+                                    ; with 2217*4 and 5352*4 without losing the
+                                    ; sign bit (overflow)
+
+    smuad       r4, r6, lr          ; o0 = (i1+i2)*8 + (i0+i3)*8
+    smusd       r5, r6, lr          ; o2 = (i1+i2)*8 - (i0+i3)*8
+
+    smlad       r6, r7, r12, r11    ; o1 = (c1 * 2217 + d1 * 5352 +  14500)
+    smlsdx      r7, r7, r12, r10    ; o3 = (d1 * 2217 - c1 * 5352 +   7500)
+
+    ldrd        r8, r9, [r0]        ; [i5 | i4] [i7 | i6]
+
+    pkhbt       r3, r4, r6, lsl #4  ; [o1 | o0], keep in register for PART 2
+    pkhbt       r6, r5, r7, lsl #4  ; [o3 | o2]
+
+    str         r6, [r1, #4]
+
+    ; coeffs 4-7
+    ror         r9, r9, #16         ; [i6 | i7]
+
+    qadd16      r6, r8, r9          ; [i5+i6 | i4+i7] = [b1 | a1] without shift
+    qsub16      r7, r8, r9          ; [i5-i6 | i4-i7] = [c1 | d1] without shift
+
+    add         r0, r0, r2          ; update input pointer
+
+    qadd16      r7, r7, r7          ; 2x[c1|d1] --> we can use smlad and smlsd
+                                    ; with 2217*4 and 5352*4 without losing the
+                                    ; sign bit (overflow)
+
+    smuad       r9, r6, lr          ; o4 = (i5+i6)*8 + (i4+i7)*8
+    smusd       r8, r6, lr          ; o6 = (i5+i6)*8 - (i4+i7)*8
+
+    smlad       r6, r7, r12, r11    ; o5 = (c1 * 2217 + d1 * 5352 +  14500)
+    smlsdx      r7, r7, r12, r10    ; o7 = (d1 * 2217 - c1 * 5352 +   7500)
+
+    ldrd        r4, r5, [r0]        ; [i9 | i8] [i11 | i10]
+
+    pkhbt       r9, r9, r6, lsl #4  ; [o5 | o4], keep in register for PART 2
+    pkhbt       r6, r8, r7, lsl #4  ; [o7 | o6]
+
+    str         r6, [r1, #12]
+
+    ; coeffs 8-11
+    ror         r5, r5, #16         ; [i10 | i11]
+
+    qadd16      r6, r4, r5          ; [i9+i10 | i8+i11]=[b1 | a1] without shift
+    qsub16      r7, r4, r5          ; [i9-i10 | i8-i11]=[c1 | d1] without shift
+
+    add         r0, r0, r2          ; update input pointer
+
+    qadd16      r7, r7, r7          ; 2x[c1|d1] --> we can use smlad and smlsd
+                                    ; with 2217*4 and 5352*4 without losing the
+                                    ; sign bit (overflow)
+
+    smuad       r2, r6, lr          ; o8 = (i9+i10)*8 + (i8+i11)*8
+    smusd       r8, r6, lr          ; o10 = (i9+i10)*8 - (i8+i11)*8
+
+    smlad       r6, r7, r12, r11    ; o9 = (c1 * 2217 + d1 * 5352 +  14500)
+    smlsdx      r7, r7, r12, r10    ; o11 = (d1 * 2217 - c1 * 5352 +   7500)
+
+    ldrd        r4, r5, [r0]        ; [i13 | i12] [i15 | i14]
+
+    pkhbt       r2, r2, r6, lsl #4  ; [o9 | o8], keep in register for PART 2
+    pkhbt       r6, r8, r7, lsl #4  ; [o11 | o10]
+
+    str         r6, [r1, #20]
+
+    ; coeffs 12-15
+    ror         r5, r5, #16         ; [i14 | i15]
+
+    qadd16      r6, r4, r5          ; [i13+i14 | i12+i15]=[b1|a1] without shift
+    qsub16      r7, r4, r5          ; [i13-i14 | i12-i15]=[c1|d1] without shift
+
+    qadd16      r7, r7, r7          ; 2x[c1|d1] --> we can use smlad and smlsd
+                                    ; with 2217*4 and 5352*4 without losing the
+                                    ; sign bit (overflow)
+
+    smuad       r4, r6, lr          ; o12 = (i13+i14)*8 + (i12+i15)*8
+    smusd       r5, r6, lr          ; o14 = (i13+i14)*8 - (i12+i15)*8
+
+    smlad       r6, r7, r12, r11    ; o13 = (c1 * 2217 + d1 * 5352 +  14500)
+    smlsdx      r7, r7, r12, r10    ; o15 = (d1 * 2217 - c1 * 5352 +   7500)
+
+    pkhbt       r0, r4, r6, lsl #4  ; [o13 | o12], keep in register for PART 2
+    pkhbt       r6, r5, r7, lsl #4  ; [o15 | o14]
+
+    str         r6, [r1, #28]
+
+
+    ; PART 2 -------------------------------------------------
+    ldr         r11, c12000
+    ldr         r10, c51000
+    ldr         lr, c0x00070007
+
+    qadd16      r4, r3, r0          ; a1 = [i1+i13 | i0+i12]
+    qadd16      r5, r9, r2          ; b1 = [i5+i9  |  i4+i8]
+    qsub16      r6, r9, r2          ; c1 = [i5-i9  |  i4-i8]
+    qsub16      r7, r3, r0          ; d1 = [i1-i13 | i0-i12]
+
+    qadd16      r4, r4, lr          ; a1 + 7
+
+    add         r0, r11, #0x10000   ; add (d!=0)
+
+    qadd16      r2, r4, r5          ; a1 + b1 + 7
+    qsub16      r3, r4, r5          ; a1 - b1 + 7
+
+    ldr         r12, c0x08a914e8    ; [2217 | 5352]
+
+    lsl         r8, r2, #16         ; prepare bottom halfword for scaling
+    asr         r2, r2, #4          ; scale top halfword
+    lsl         r9, r3, #16         ; prepare bottom halfword for scaling
+    asr         r3, r3, #4          ; scale top halfword
+    pkhtb       r4, r2, r8, asr #20 ; pack and scale bottom halfword
+    pkhtb       r5, r3, r9, asr #20 ; pack and scale bottom halfword
+
+    smulbt      r2, r6, r12         ; [ ------ | c1*2217]
+    str         r4, [r1, #0]        ; [     o1 |      o0]
+    smultt      r3, r6, r12         ; [c1*2217 | ------ ]
+    str         r5, [r1, #16]       ; [     o9 |      o8]
+
+    smlabb      r8, r7, r12, r2     ; [ ------ | d1*5352]
+    smlatb      r9, r7, r12, r3     ; [d1*5352 | ------ ]
+
+    smulbb      r2, r6, r12         ; [ ------ | c1*5352]
+    smultb      r3, r6, r12         ; [c1*5352 | ------ ]
+
+    lsls        r6, r7, #16         ; d1 != 0 ?
+    addeq       r8, r8, r11         ; c1_b*2217+d1_b*5352+12000 + (d==0)
+    addne       r8, r8, r0          ; c1_b*2217+d1_b*5352+12000 + (d!=0)
+    asrs        r6, r7, #16
+    addeq       r9, r9, r11         ; c1_t*2217+d1_t*5352+12000 + (d==0)
+    addne       r9, r9, r0          ; c1_t*2217+d1_t*5352+12000 + (d!=0)
+
+    smlabt      r4, r7, r12, r10    ; [ ------ | d1*2217] + 51000
+    smlatt      r5, r7, r12, r10    ; [d1*2217 | ------ ] + 51000
+
+    pkhtb       r9, r9, r8, asr #16
+
+    sub         r4, r4, r2
+    sub         r5, r5, r3
+
+    ldr         r3, [r1, #4]        ; [i3 | i2]
+
+    pkhtb       r5, r5, r4, asr #16 ; [o13|o12]
+
+    str         r9, [r1, #8]        ; [o5 | 04]
+
+    ldr         r9, [r1, #12]       ; [i7 | i6]
+    ldr         r8, [r1, #28]       ; [i15|i14]
+    ldr         r2, [r1, #20]       ; [i11|i10]
+    str         r5, [r1, #24]       ; [o13|o12]
+
+    qadd16      r4, r3, r8          ; a1 = [i3+i15 | i2+i14]
+    qadd16      r5, r9, r2          ; b1 = [i7+i11 | i6+i10]
+
+    qadd16      r4, r4, lr          ; a1 + 7
+
+    qsub16      r6, r9, r2          ; c1 = [i7-i11 | i6-i10]
+    qadd16      r2, r4, r5          ; a1 + b1 + 7
+    qsub16      r7, r3, r8          ; d1 = [i3-i15 | i2-i14]
+    qsub16      r3, r4, r5          ; a1 - b1 + 7
+
+    lsl         r8, r2, #16         ; prepare bottom halfword for scaling
+    asr         r2, r2, #4          ; scale top halfword
+    lsl         r9, r3, #16         ; prepare bottom halfword for scaling
+    asr         r3, r3, #4          ; scale top halfword
+    pkhtb       r4, r2, r8, asr #20 ; pack and scale bottom halfword
+    pkhtb       r5, r3, r9, asr #20 ; pack and scale bottom halfword
+
+    smulbt      r2, r6, r12         ; [ ------ | c1*2217]
+    str         r4, [r1, #4]        ; [     o3 |      o2]
+    smultt      r3, r6, r12         ; [c1*2217 | ------ ]
+    str         r5, [r1, #20]       ; [    o11 |     o10]
+
+    smlabb      r8, r7, r12, r2     ; [ ------ | d1*5352]
+    smlatb      r9, r7, r12, r3     ; [d1*5352 | ------ ]
+
+    smulbb      r2, r6, r12         ; [ ------ | c1*5352]
+    smultb      r3, r6, r12         ; [c1*5352 | ------ ]
+
+    lsls        r6, r7, #16         ; d1 != 0 ?
+    addeq       r8, r8, r11         ; c1_b*2217+d1_b*5352+12000 + (d==0)
+    addne       r8, r8, r0          ; c1_b*2217+d1_b*5352+12000 + (d!=0)
+
+    asrs        r6, r7, #16
+    addeq       r9, r9, r11         ; c1_t*2217+d1_t*5352+12000 + (d==0)
+    addne       r9, r9, r0          ; c1_t*2217+d1_t*5352+12000 + (d!=0)
+
+    smlabt      r4, r7, r12, r10    ; [ ------ | d1*2217] + 51000
+    smlatt      r5, r7, r12, r10    ; [d1*2217 | ------ ] + 51000
+
+    pkhtb       r9, r9, r8, asr #16
+
+    sub         r4, r4, r2
+    sub         r5, r5, r3
+
+    str         r9, [r1, #12]       ; [o7 | o6]
+    pkhtb       r5, r5, r4, asr #16 ; [o15|o14]
+
+    str         r5, [r1, #28]       ; [o15|o14]
+
+    ldmfd       sp!, {r4 - r12, pc}
+
+    ENDP
+
+; Used constants
+c7500
+    DCD     7500
+c14500
+    DCD     14500
+c0x22a453a0
+    DCD     0x22a453a0
+c0x00080008
+    DCD     0x00080008
+c12000
+    DCD     12000
+c51000
+    DCD     51000
+c0x00070007
+    DCD     0x00070007
+c0x08a914e8
+    DCD     0x08a914e8
+
+    END
--- a/vp8/encoder/arm/dct_arm.c
+++ b/vp8/encoder/arm/dct_arm.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp8/encoder/dct.h"
+
+#if HAVE_ARMV6
+
+void vp8_fast_fdct8x4_armv6(short *input, short *output, int pitch)
+{
+    vp8_fast_fdct4x4_armv6(input,   output,    pitch);
+    vp8_fast_fdct4x4_armv6(input + 4, output + 16, pitch);
+}
+
+#endif /* HAVE_ARMV6 */
+
+
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -14,12 +14,21 @@

 #if HAVE_ARMV6
 extern prototype_fdct(vp8_short_walsh4x4_armv6);
+extern prototype_fdct(vp8_fast_fdct4x4_armv6);
+extern prototype_fdct(vp8_fast_fdct8x4_armv6);

 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
+
+#undef  vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_armv6
+
+#undef  vp8_fdct_fast8x4
+#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_armv6
 #endif
-#endif
+
+#endif /* HAVE_ARMV6 */

 #if HAVE_ARMV7
 extern prototype_fdct(vp8_short_fdct4x4_neon);
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -808,7 +808,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);

            for (i = 0; i < cm->mb_rows; i++)
-                cpi->mt_current_mb_col[i] = 0;
+                cpi->mt_current_mb_col[i] = -1;

            for (i = 0; i < cpi->encoding_thread_count; i++)
            {
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -25,19 +25,6 @@
 #define intra4x4pbias_rate    256


-void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode)
-{
-    if (i < 12)
-    {
-        abmode[i+4] = best_mode;
-    }
-
-    if ((i & 3) != 3)
-    {
-        lbmode[i+1] = best_mode;
-    }
-
-}
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x) (x)
 #else
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -17,7 +17,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
 void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
-void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
 void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);

 #endif
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -104,7 +104,7 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
    ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 }

-void vp8_build_dcblock(MACROBLOCK *x)
+static void build_dcblock(MACROBLOCK *x)
 {
    short *src_diff_ptr = &x->src_diff[384];
    int i;
@@ -138,7 +138,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
    }

    // build dc block from 16 y dc values
-    vp8_build_dcblock(x);
+    build_dcblock(x);

    // do 2nd order transform on the dc block
    x->short_walsh4x4(&x->block[24].src_diff[0],
@@ -147,7 +147,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
 }


-void vp8_transform_mb(MACROBLOCK *x)
+static void transform_mb(MACROBLOCK *x)
 {
    int i;

@@ -159,7 +159,7 @@ void vp8_transform_mb(MACROBLOCK *x)

    // build dc block from 16 y dc values
    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-        vp8_build_dcblock(x);
+        build_dcblock(x);

    for (i = 16; i < 24; i += 2)
    {
@@ -174,7 +174,8 @@ void vp8_transform_mb(MACROBLOCK *x)

 }

-void vp8_transform_mby(MACROBLOCK *x)
+
+static void transform_mby(MACROBLOCK *x)
 {
    int i;

@@ -187,7 +188,7 @@ void vp8_transform_mby(MACROBLOCK *x)
    // build dc block from 16 y dc values
    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
    {
-        vp8_build_dcblock(x);
+        build_dcblock(x);
        x->short_walsh4x4(&x->block[24].src_diff[0],
            &x->block[24].coeff[0], 8);
    }
@@ -255,9 +256,9 @@ static const int plane_rd_mult[4]=
    Y1_RD_MULT
 };

-void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
-                    ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
-                    const VP8_ENCODER_RTCD *rtcd)
+static void optimize_b(MACROBLOCK *mb, int ib, int type,
+                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+                       const VP8_ENCODER_RTCD *rtcd)
 {
    BLOCK *b;
    BLOCKD *d;
@@ -501,7 +502,7 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
    *a = *l = (d->eob != !type);
 }

-void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
    int b;
    int type;
@@ -522,20 +523,20 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)

    for (b = 0; b < 16; b++)
    {
-        vp8_optimize_b(x, b, type,
+        optimize_b(x, b, type,
            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }

    for (b = 16; b < 24; b++)
    {
-        vp8_optimize_b(x, b, PLANE_TYPE_UV,
+        optimize_b(x, b, PLANE_TYPE_UV,
            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }

    if (has_2nd_order)
    {
        b=24;
-        vp8_optimize_b(x, b, PLANE_TYPE_Y2,
+        optimize_b(x, b, PLANE_TYPE_Y2,
            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }
 }
@@ -569,7 +570,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)

    for (b = 0; b < 16; b++)
    {
-        vp8_optimize_b(x, b, type,
+        optimize_b(x, b, type,
        ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }

@@ -577,7 +578,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
    if (has_2nd_order)
    {
        b=24;
-        vp8_optimize_b(x, b, PLANE_TYPE_Y2,
+        optimize_b(x, b, PLANE_TYPE_Y2,
            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }
 }
@@ -603,7 +604,7 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)

    for (b = 16; b < 24; b++)
    {
-        vp8_optimize_b(x, b, PLANE_TYPE_UV,
+        optimize_b(x, b, PLANE_TYPE_UV,
            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    }
 }
@@ -615,13 +616,13 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)

    vp8_subtract_mb(rtcd, x);

-    vp8_transform_mb(x);
+    transform_mb(x);

    vp8_quantize_mb(x);

 #if !(CONFIG_REALTIME_ONLY)
    if (x->optimize)
-        vp8_optimize_mb(x, rtcd);
+        optimize_mb(x, rtcd);
 #endif

    vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
@@ -638,7 +639,7 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)

    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);

-    vp8_transform_mby(x);
+    transform_mby(x);

    vp8_quantize_mby(x);

@@ -649,22 +650,6 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 }


-void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
-{
-    vp8_build_inter_predictors_mbuv(&x->e_mbd);
-
-    ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
-
-    vp8_transform_mbuv(x);
-
-    vp8_quantize_mbuv(x);
-
-    vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
-
-    vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
-}
-
-
 void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
    vp8_build_inter_predictors_mbuv(&x->e_mbd);
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -101,9 +101,6 @@ void vp8_build_dcblock(MACROBLOCK *b);
 void vp8_transform_mb(MACROBLOCK *mb);
 void vp8_transform_mbuv(MACROBLOCK *x);
 void vp8_transform_intra_mby(MACROBLOCK *x);
-void Encode16x16Y(MACROBLOCK *x);
-void Encode16x16UV(MACROBLOCK *x);
-void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
 void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
 void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
 void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -24,8 +24,6 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
 extern void vp8_build_block_offsets(MACROBLOCK *x);
 extern void vp8_setup_block_ptrs(MACROBLOCK *x);

-#if CONFIG_MULTITHREAD
-
 extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);

 static THREAD_FUNCTION loopfilter_thread(void *p_data)
@@ -51,7 +49,6 @@ static THREAD_FUNCTION loopfilter_thread(void *p_data)

    return 0;
 }
-#endif

 static
 THREAD_FUNCTION thread_encoding_proc(void *p_data)
@@ -458,53 +455,58 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,

 void vp8cx_create_encoder_threads(VP8_COMP *cpi)
 {
-    cpi->b_multi_threaded = 0;
+    const VP8_COMMON * cm = &cpi->common;

+    cpi->b_multi_threaded = 0;
+    cpi->encoding_thread_count = 0;
    cpi->processor_core_count = 32; //vp8_get_proc_core_count();

    if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;
+        int th_count = cpi->oxcf.multi_threaded - 1;

        if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
-            cpi->encoding_thread_count = cpi->processor_core_count - 1;
-        else
-            cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1;
+            th_count = cpi->processor_core_count - 1;

-        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count));
-        CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
-        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count));
-        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count);
-        CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count));
-        CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cpi->common.mb_rows));
+        /* we have th_count + 1 (main) threads processing one row each */
+        /* no point to have more threads than the sync range allows */
+        if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
+        {
+            th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
+        }
+
+        if(th_count == 0)
+            return;
+
+        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
+        CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
+        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
+        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
+        CHECK_MEM_ERROR(cpi->en_thread_data,
+                        vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
+        CHECK_MEM_ERROR(cpi->mt_current_mb_col,
+                        vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));

-        //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL);
        sem_init(&cpi->h_event_end_encoding, 0, 0);

        cpi->b_multi_threaded = 1;
+        cpi->encoding_thread_count = th_count;

-        //printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1));
+        /*
+        printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
+               (cpi->encoding_thread_count +1));
+        */

-        for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++)
+        for (ithread = 0; ithread < th_count; ithread++)
        {
            ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];

-            //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL);
            sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
            ethd->ithread = ithread;
            ethd->ptr1 = (void *)cpi;
            ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];

-            //printf(" call begin thread %d \n", ithread);
-
-            //cpi->h_encoding_thread[ithread] =   (HANDLE)_beginthreadex(
-            //  NULL,           // security
-            //  0,              // stksize
-            //  thread_encoding_proc,
-            //  (&cpi->en_thread_data[ithread]),          // Thread data
-            //  0,
-            //  NULL);
-
            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
        }

--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -67,7 +67,7 @@ static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3};
 static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3};


-const int cq_level[QINDEX_RANGE] =
+static const int cq_level[QINDEX_RANGE] =
 {
    0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
    9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
@@ -79,10 +79,9 @@ const int cq_level[QINDEX_RANGE] =
    86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
 };

-void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
-int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps);
+static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);

-int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
+static int encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
 {

    int i;
@@ -146,7 +145,7 @@ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
    /*start_pos = cpi->stats_in;
    sum_iiratio = 0.0;
    i = 0;
-    while ( (i < 1) && vp8_input_stats(cpi,&next_frame) != EOF )
+    while ( (i < 1) && input_stats(cpi,&next_frame) != EOF )
    {

        next_iiratio = next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
@@ -212,7 +211,7 @@ static const double weight_table[256] = {
 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000
 };

-double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
+static double simple_weight(YV12_BUFFER_CONFIG *source)
 {
    int i, j;

@@ -240,7 +239,7 @@ double vp8_simple_weight(YV12_BUFFER_CONFIG *source)


 // This function returns the current per frame maximum bitrate target
-int frame_max_bits(VP8_COMP *cpi)
+static int frame_max_bits(VP8_COMP *cpi)
 {
    // Max allocation for a single frame based on the max section guidelines passed in and how many bits are left
    int max_bits;
@@ -281,9 +280,9 @@ int frame_max_bits(VP8_COMP *cpi)
 }


-void vp8_output_stats(const VP8_COMP            *cpi,
-                      struct vpx_codec_pkt_list *pktlist,
-                      FIRSTPASS_STATS            *stats)
+static void output_stats(const VP8_COMP            *cpi,
+                         struct vpx_codec_pkt_list *pktlist,
+                         FIRSTPASS_STATS            *stats)
 {
    struct vpx_codec_cx_pkt pkt;
    pkt.kind = VPX_CODEC_STATS_PKT;
@@ -323,7 +322,7 @@ void vp8_output_stats(const VP8_COMP            *cpi,
 #endif
 }

-int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
+static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
 {
    if (cpi->stats_in >= cpi->stats_in_end)
        return EOF;
@@ -333,7 +332,7 @@ int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
    return 1;
 }

-void vp8_zero_stats(FIRSTPASS_STATS *section)
+static void zero_stats(FIRSTPASS_STATS *section)
 {
    section->frame      = 0.0;
    section->intra_error = 0.0;
@@ -353,7 +352,7 @@ void vp8_zero_stats(FIRSTPASS_STATS *section)
    section->count      = 0.0;
    section->duration   = 1.0;
 }
-void vp8_accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
+static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
 {
    section->frame += frame->frame;
    section->intra_error += frame->intra_error;
@@ -373,7 +372,7 @@ void vp8_accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
    section->count      += frame->count;
    section->duration   += frame->duration;
 }
-void vp8_avg_stats(FIRSTPASS_STATS *section)
+static void avg_stats(FIRSTPASS_STATS *section)
 {
    if (section->count < 1.0)
        return;
@@ -397,15 +396,15 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)

 void vp8_init_first_pass(VP8_COMP *cpi)
 {
-    vp8_zero_stats(cpi->total_stats);
+    zero_stats(cpi->total_stats);
 }

 void vp8_end_first_pass(VP8_COMP *cpi)
 {
-    vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
+    output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
 }

-void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
+static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
 {
    MACROBLOCKD * const xd = & x->e_mbd;
    BLOCK *b = &x->block[0];
@@ -424,7 +423,7 @@ void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * r
    VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
 }

-void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
+static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
 {
    MACROBLOCKD *const xd = & x->e_mbd;
    BLOCK *b = &x->block[0];
@@ -575,7 +574,7 @@ void vp8_first_pass(VP8_COMP *cpi)
            xd->left_available = (mb_col != 0);

            // do intra 16x16 prediction
-            this_error = vp8_encode_intra(cpi, x, use_dc_pred);
+            this_error = encode_intra(cpi, x, use_dc_pred);

            // "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
            // We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
@@ -600,13 +599,13 @@ void vp8_first_pass(VP8_COMP *cpi)
                int motion_error = INT_MAX;

                // Simple 0,0 motion with no mv overhead
-                vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
+                zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
                d->bmi.mv.as_mv.row = 0;
                d->bmi.mv.as_mv.col = 0;

                // Test last reference frame using the previous best mv as the
                // starting point (best reference) for the search
-                vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
+                first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
                                        &d->bmi.mv.as_mv, lst_yv12,
                                        &motion_error, recon_yoffset);

@@ -614,7 +613,7 @@ void vp8_first_pass(VP8_COMP *cpi)
                if (best_ref_mv.as_int)
                {
                   tmp_err = INT_MAX;
-                   vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
+                   first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
                                     lst_yv12, &tmp_err, recon_yoffset);

                   if ( tmp_err < motion_error )
@@ -628,7 +627,7 @@ void vp8_first_pass(VP8_COMP *cpi)
                // Experimental search in a second reference frame ((0,0) based only)
                if (cm->current_video_frame > 1)
                {
-                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
+                    first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);

                    if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
                    {
@@ -752,7 +751,7 @@ void vp8_first_pass(VP8_COMP *cpi)
        fps.frame      = cm->current_video_frame ;
        fps.intra_error = intra_error >> 8;
        fps.coded_error = coded_error >> 8;
-        weight = vp8_simple_weight(cpi->Source);
+        weight = simple_weight(cpi->Source);


        if (weight < 0.1)
@@ -796,8 +795,8 @@ void vp8_first_pass(VP8_COMP *cpi)
        memcpy(cpi->this_frame_stats,
               &fps,
               sizeof(FIRSTPASS_STATS));
-        vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
-        vp8_accumulate_stats(cpi->total_stats, &fps);
+        output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
+        accumulate_stats(cpi->total_stats, &fps);
    }

    // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
@@ -1168,7 +1167,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)

    double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);

-    vp8_zero_stats(cpi->total_stats);
+    zero_stats(cpi->total_stats);

    if (!cpi->stats_in_end)
        return;
@@ -1202,7 +1201,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
    cpi->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
    cpi->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;

-    vp8_avg_stats(cpi->total_stats);
+    avg_stats(cpi->total_stats);

    // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
    {
@@ -1211,7 +1210,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)

        start_pos = cpi->stats_in;               // Note starting "file" position

-        while (vp8_input_stats(cpi, &this_frame) != EOF)
+        while (input_stats(cpi, &this_frame) != EOF)
        {
            IIRatio = this_frame.intra_error / DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
            IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio;
@@ -1232,7 +1231,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
        cpi->modified_error_total = 0.0;
        cpi->modified_error_used = 0.0;

-        while (vp8_input_stats(cpi, &this_frame) != EOF)
+        while (input_stats(cpi, &this_frame) != EOF)
        {
            cpi->modified_error_total += calculate_modified_err(cpi, &this_frame);
        }
@@ -1255,7 +1254,7 @@ void vp8_end_second_pass(VP8_COMP *cpi)

 // This function gives and estimate of how badly we believe
 // the prediction quality is decaying from frame to frame.
-double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
+static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
 {
    double prediction_decay_rate;
    double motion_decay;
@@ -1293,7 +1292,7 @@ double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-BOOL detect_transition_to_still(
+static int detect_transition_to_still(
    VP8_COMP *cpi,
    int frame_interval,
    int still_interval,
@@ -1318,7 +1317,7 @@ BOOL detect_transition_to_still(
        // persists...
        for ( j = 0; j < still_interval; j++ )
        {
-            if (EOF == vp8_input_stats(cpi, &tmp_next_frame))
+            if (EOF == input_stats(cpi, &tmp_next_frame))
                break;

            decay_rate = get_prediction_decay_rate(cpi, &tmp_next_frame);
@@ -1417,7 +1416,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        mod_err_per_mb_accumulator +=
            mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);

-        if (EOF == vp8_input_stats(cpi, &next_frame))
+        if (EOF == input_stats(cpi, &next_frame))
            break;

        // Accumulate motion stats.
@@ -1691,7 +1690,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        {
            while (cpi->baseline_gf_interval < cpi->frames_to_key)
            {
-                if (EOF == vp8_input_stats(cpi, this_frame))
+                if (EOF == input_stats(cpi, this_frame))
                    break;

                cpi->baseline_gf_interval++;
@@ -1870,16 +1869,16 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        FIRSTPASS_STATS sectionstats;
        double Ratio;

-        vp8_zero_stats(&sectionstats);
+        zero_stats(&sectionstats);
        reset_fpf_position(cpi, start_pos);

        for (i = 0 ; i < cpi->baseline_gf_interval ; i++)
        {
-            vp8_input_stats(cpi, &next_frame);
-            vp8_accumulate_stats(&sectionstats, &next_frame);
+            input_stats(cpi, &next_frame);
+            accumulate_stats(&sectionstats, &next_frame);
        }

-        vp8_avg_stats(&sectionstats);
+        avg_stats(&sectionstats);

        cpi->section_intra_rating =
            sectionstats.intra_error /
@@ -1977,7 +1976,7 @@ void vp8_second_pass(VP8_COMP *cpi)

    vp8_clear_system_state();

-    if (EOF == vp8_input_stats(cpi, &this_frame))
+    if (EOF == input_stats(cpi, &this_frame))
        return;

    this_frame_error = this_frame.ssim_weighted_pred_err;
@@ -1998,7 +1997,7 @@ void vp8_second_pass(VP8_COMP *cpi)
    {
        // Define next KF group and assign bits to it
        vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
-        vp8_find_next_key_frame(cpi, &this_frame_copy);
+        find_next_key_frame(cpi, &this_frame_copy);

        // Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
        // outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
@@ -2239,7 +2238,7 @@ static BOOL test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRST
            old_boost_score = boost_score;

            // Get the next frame details
-            if (EOF == vp8_input_stats(cpi, &local_next_frame))
+            if (EOF == input_stats(cpi, &local_next_frame))
                break;
        }

@@ -2257,7 +2256,7 @@ static BOOL test_candidate_kf(VP8_COMP *cpi,  FIRSTPASS_STATS *last_frame, FIRST

    return is_viable_kf;
 }
-void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
+static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
    int i,j;
    FIRSTPASS_STATS last_frame;
@@ -2317,7 +2316,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)

        // load a the next frame's stats
        vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
-        vp8_input_stats(cpi, this_frame);
+        input_stats(cpi, this_frame);

        // Provided that we are not at the end of the file...
        if (cpi->oxcf.auto_key
@@ -2395,7 +2394,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
            kf_group_coded_err += tmp_frame.coded_error;

            // Load a the next frame's stats
-            vp8_input_stats(cpi, &tmp_frame);
+            input_stats(cpi, &tmp_frame);
        }

        // Reset to the start of the group
@@ -2500,7 +2499,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        double motion_decay;
        double motion_pct;

-        if (EOF == vp8_input_stats(cpi, &next_frame))
+        if (EOF == input_stats(cpi, &next_frame))
            break;

        if (next_frame.intra_error > cpi->kf_intra_err_min)
@@ -2535,16 +2534,16 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        FIRSTPASS_STATS sectionstats;
        double Ratio;

-        vp8_zero_stats(&sectionstats);
+        zero_stats(&sectionstats);
        reset_fpf_position(cpi, start_position);

        for (i = 0 ; i < cpi->frames_to_key ; i++)
        {
-            vp8_input_stats(cpi, &next_frame);
-            vp8_accumulate_stats(&sectionstats, &next_frame);
+            input_stats(cpi, &next_frame);
+            accumulate_stats(&sectionstats, &next_frame);
        }

-        vp8_avg_stats(&sectionstats);
+        avg_stats(&sectionstats);

         cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);

--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -17,8 +17,6 @@
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
 void vp8_arch_arm_encoder_init(VP8_COMP *cpi);

-
-void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
 extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);

 void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -43,7 +43,7 @@ int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
    return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
 }

-int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
+static int mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
 {
    //int i;
    //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
@@ -221,7 +221,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

    // calculate central point error
    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
-    besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
+    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    while (--halfiters)
@@ -337,13 +337,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,

    // calculate central point error
    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
+    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

    // go left then right and check error
    this_mv.row = startmv.row;
    this_mv.col = ((startmv.col - 8) | 4);
    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (left < bestmse)
    {
@@ -353,7 +353,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,

    this_mv.col += 8;
    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (right < bestmse)
    {
@@ -365,7 +365,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
    this_mv.col = startmv.col;
    this_mv.row = ((startmv.row - 8) | 4);
    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (up < bestmse)
    {
@@ -375,7 +375,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,

    this_mv.row += 8;
    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (down < bestmse)
    {
@@ -415,7 +415,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
        break;
    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -451,7 +451,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
    }

-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (left < bestmse)
    {
@@ -461,7 +461,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,

    this_mv.col += 4;
    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (right < bestmse)
    {
@@ -483,7 +483,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    }

-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (up < bestmse)
    {
@@ -493,7 +493,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,

    this_mv.row += 4;
    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (down < bestmse)
    {
@@ -582,7 +582,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
        break;
    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -621,13 +621,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm

    // calculate central point error
    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
+    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

    // go left then right and check error
    this_mv.row = startmv.row;
    this_mv.col = ((startmv.col - 8) | 4);
    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (left < bestmse)
    {
@@ -637,7 +637,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm

    this_mv.col += 8;
    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (right < bestmse)
    {
@@ -649,7 +649,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
    this_mv.col = startmv.col;
    this_mv.row = ((startmv.row - 8) | 4);
    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (up < bestmse)
    {
@@ -659,7 +659,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm

    this_mv.row += 8;
    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (down < bestmse)
    {
@@ -697,7 +697,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
        break;
    }

-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -709,7 +709,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
    this_mv.col = (this_mv.col - 8) | 4;
    this_mv.row = (this_mv.row - 8) | 4;
    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -719,7 +719,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm

    this_mv.col += 8;
    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -730,7 +730,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
    this_mv.col = (this_mv.col - 8) | 4;
    this_mv.row = startmv.row + 4;
    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -740,7 +740,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm

    this_mv.col += 8;
    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

    if (diag < bestmse)
    {
@@ -894,7 +894,7 @@ cal_neighbors:
    best_mv->row = br;
    best_mv->col = bc;

-    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
+    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
 }
 #undef MVC
 #undef PRE
@@ -955,7 +955,7 @@ int vp8_diamond_search_sad
    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    {
        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    }

    // search_param determines the length of the initial step and hence the number of iterations
@@ -986,7 +986,7 @@ int vp8_diamond_search_sad
                {
                    this_mv.row = this_row_offset << 3;
                    this_mv.col = this_col_offset << 3;
-                    thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                    thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                    if (thissad < bestsad)
                    {
@@ -1017,7 +1017,7 @@ int vp8_diamond_search_sad
        return INT_MAX;

    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+    + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
 }

 int vp8_diamond_search_sadx4
@@ -1071,7 +1071,7 @@ int vp8_diamond_search_sadx4
    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    {
        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    }

    // search_param determines the length of the initial step and hence the number of iterations
@@ -1113,7 +1113,7 @@ int vp8_diamond_search_sadx4
                    {
                        this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
                        this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                        sad_array[t] += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                        if (sad_array[t] < bestsad)
                        {
@@ -1142,7 +1142,7 @@ int vp8_diamond_search_sadx4
                    {
                        this_mv.row = this_row_offset << 3;
                        this_mv.col = this_col_offset << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                        thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                        if (thissad < bestsad)
                        {
@@ -1173,7 +1173,7 @@ int vp8_diamond_search_sadx4
        return INT_MAX;

    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+    + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
 }


@@ -1215,8 +1215,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
    {
        // Baseline value at the centre

-        //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
+        //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    }

    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1242,9 +1242,9 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);

            this_mv.col = c << 3;
-            //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
+            //thissad += (int)sqrt(mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
            //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
-            thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
+            thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);

            if (thissad < bestsad)
            {
@@ -1263,7 +1263,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro

    if (bestsad < INT_MAX)
        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
    else
        return INT_MAX;
 }
@@ -1306,7 +1306,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    {
        // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    }

    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1341,7 +1341,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                if (thissad < bestsad)
                {
                    this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                    if (thissad < bestsad)
                    {
@@ -1364,7 +1364,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
            if (thissad < bestsad)
            {
                this_mv.col = c << 3;
-                thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                if (thissad < bestsad)
                {
@@ -1386,7 +1386,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er

    if (bestsad < INT_MAX)
        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
    else
        return INT_MAX;
 }
@@ -1430,7 +1430,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    {
        // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    }

    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1465,7 +1465,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                if (thissad < bestsad)
                {
                    this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                    if (thissad < bestsad)
                    {
@@ -1494,7 +1494,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
                if (thissad < bestsad)
                {
                    this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                    if (thissad < bestsad)
                    {
@@ -1517,7 +1517,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
            if (thissad < bestsad)
            {
                this_mv.col = c << 3;
-                thissad  += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
+                thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);

                if (thissad < bestsad)
                {
@@ -1538,7 +1538,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er

    if (bestsad < INT_MAX)
        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
    else
        return INT_MAX;
 }
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -70,7 +70,6 @@ extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_

 int vp8_estimate_entropy_savings(VP8_COMP *cpi);
 int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
-int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);

 extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);

@@ -261,7 +260,7 @@ static void setup_features(VP8_COMP *cpi)
 }


-void vp8_dealloc_compressor_data(VP8_COMP *cpi)
+static void dealloc_compressor_data(VP8_COMP *cpi)
 {
    vpx_free(cpi->tplist);
    cpi->tplist = NULL;
@@ -1453,7 +1452,7 @@ rescale(int val, int num, int denom)
 }


-void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
+static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 {
    VP8_COMP *cpi = (VP8_COMP *)(ptr);
    VP8_COMMON *cm = &cpi->common;
@@ -1844,7 +1843,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
    vp8_create_common(&cpi->common);
    vp8_cmachine_specific_config(cpi);

-    vp8_init_config((VP8_PTR)cpi, oxcf);
+    init_config((VP8_PTR)cpi, oxcf);

    memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
    cpi->common.current_video_frame   = 0;
@@ -2345,7 +2344,7 @@ void vp8_remove_compressor(VP8_PTR *ptr)
    vp8cx_remove_encoder_threads(cpi);
 #endif

-    vp8_dealloc_compressor_data(cpi);
+    dealloc_compressor_data(cpi);
    vpx_free(cpi->mb.ss);
    vpx_free(cpi->tok);
    vpx_free(cpi->cyclic_refresh_map);
@@ -4593,18 +4592,8 @@ static void encode_frame_to_data_rate

 }

-int vp8_is_gf_update_needed(VP8_PTR ptr)
-{
-    VP8_COMP *cpi = (VP8_COMP *) ptr;
-    int ret_val;

-    ret_val = cpi->gf_update_recommended;
-    cpi->gf_update_recommended = 0;
-
-    return ret_val;
-}
-
-void vp8_check_gf_quality(VP8_COMP *cpi)
+static void check_gf_quality(VP8_COMP *cpi)
 {
    VP8_COMMON *cm = &cpi->common;
    int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
@@ -4853,7 +4842,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
                if (start_frame < 0)
                    start_frame += cpi->oxcf.lag_in_frames;

-                besterr = vp8_calc_low_ss_err(&cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer,
+                besterr = calc_low_ss_err(&cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer,
                                              &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));

                for (i = 0; i < 7; i++)
@@ -4862,7 +4851,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
                    cpi->oxcf.arnr_strength = i;
                    vp8_temporal_filter_prepare_c(cpi);

-                    thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
+                    thiserr = calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
                                                  &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));

                    if (10 * thiserr < besterr * 8)
@@ -5005,7 +4994,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon

    if (cpi->compressor_speed == 2)
    {
-        vp8_check_gf_quality(cpi);
+        check_gf_quality(cpi);
        vpx_usec_timer_start(&tsctimer);
        vpx_usec_timer_start(&ticktimer);
    }
@@ -5384,7 +5373,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const

    return Total;
 }
-int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd)
+
+
+static int calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd)
 {
    int i, j;
    int Total = 0;
@@ -5412,11 +5403,7 @@ int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, co
    return Total;
 }

-int vp8_get_speed(VP8_PTR c)
-{
-    VP8_COMP   *cpi = (VP8_COMP *) c;
-    return cpi->Speed;
-}
+
 int vp8_get_quantizer(VP8_PTR c)
 {
    VP8_COMP   *cpi = (VP8_COMP *) c;
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -29,89 +29,3 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse)

    return psnr;
 }
-
-double vp8_calc_psnr(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *YPsnr, double *UPsnr, double *VPsnr, double *sq_error)
-{
-    int i, j;
-    int Diff;
-    double frame_psnr;
-    double Total;
-    double grand_total;
-    unsigned char *src = source->y_buffer;
-    unsigned char *dst = dest->y_buffer;
-
-    Total = 0.0;
-    grand_total = 0.0;
-
-    // Loop throught the Y plane raw and reconstruction data summing (square differences)
-    for (i = 0; i < source->y_height; i++)
-    {
-
-        for (j = 0; j < source->y_width; j++)
-        {
-            Diff        = (int)(src[j]) - (int)(dst[j]);
-            Total      += Diff * Diff;
-        }
-
-        src += source->y_stride;
-        dst += dest->y_stride;
-    }
-
-    // Work out Y PSNR
-    *YPsnr = vp8_mse2psnr(source->y_height * source->y_width, 255.0, Total);
-    grand_total += Total;
-    Total = 0;
-
-
-    // Loop through the U plane
-    src = source->u_buffer;
-    dst = dest->u_buffer;
-
-    for (i = 0; i < source->uv_height; i++)
-    {
-
-        for (j = 0; j < source->uv_width; j++)
-        {
-            Diff        = (int)(src[j]) - (int)(dst[j]);
-            Total      += Diff * Diff;
-        }
-
-        src += source->uv_stride;
-        dst += dest->uv_stride;
-    }
-
-    // Work out U PSNR
-    *UPsnr = vp8_mse2psnr(source->uv_height * source->uv_width, 255.0, Total);
-    grand_total += Total;
-    Total = 0;
-
-
-    // V PSNR
-    src = source->v_buffer;
-    dst = dest->v_buffer;
-
-    for (i = 0; i < source->uv_height; i++)
-    {
-
-        for (j = 0; j < source->uv_width; j++)
-        {
-            Diff        = (int)(src[j]) - (int)(dst[j]);
-            Total      += Diff * Diff;
-        }
-
-        src += source->uv_stride;
-        dst += dest->uv_stride;
-    }
-
-    // Work out UV PSNR
-    *VPsnr = vp8_mse2psnr(source->uv_height * source->uv_width, 255.0, Total);
-    grand_total += Total;
-    Total = 0;
-
-    // Work out total PSNR
-    frame_psnr = vp8_mse2psnr(source->y_height * source->y_width * 3 / 2 , 255.0, grand_total);
-
-    *sq_error = 1.0 * grand_total;
-
-    return frame_psnr;
-}
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -13,6 +13,5 @@
 #define __INC_PSNR_H

 extern double vp8_mse2psnr(double Samples, double Peak, double Mse);
-extern double vp8_calc_psnr(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *YPsnr, double *UPsnr, double *VPsnr, double *sq_error);

 #endif
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -90,7 +90,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] =
    }
 };

-const int vp8_kf_boost_qadjustment[QINDEX_RANGE] =
+static const int kf_boost_qadjustment[QINDEX_RANGE] =
 {
    128, 129, 130, 131, 132, 133, 134, 135,
    136, 137, 138, 139, 140, 141, 142, 143,
@@ -154,7 +154,7 @@ const int vp8_gf_boost_qadjustment[QINDEX_RANGE] =
 };
 */

-const int vp8_kf_gf_boost_qlimits[QINDEX_RANGE] =
+static const int kf_gf_boost_qlimits[QINDEX_RANGE] =
 {
    150, 155, 160, 165, 170, 175, 180, 185,
    190, 195, 200, 205, 210, 215, 220, 225,
@@ -175,14 +175,14 @@ const int vp8_kf_gf_boost_qlimits[QINDEX_RANGE] =
 };

 // % adjustment to target kf size based on seperation from previous frame
-const int vp8_kf_boost_seperationt_adjustment[16] =
+static const int kf_boost_seperation_adjustment[16] =
 {
    30,   40,   50,   55,   60,   65,   70,   75,
    80,   85,   90,   95,  100,  100,  100,  100,
 };


-const int vp8_gf_adjust_table[101] =
+static const int gf_adjust_table[101] =
 {
    100,
    115, 130, 145, 160, 175, 190, 200, 210, 220, 230,
@@ -197,13 +197,13 @@ const int vp8_gf_adjust_table[101] =
    400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
 };

-const int vp8_gf_intra_useage_adjustment[20] =
+static const int gf_intra_usage_adjustment[20] =
 {
    125, 120, 115, 110, 105, 100,  95,  85,  80,  75,
    70,  65,  60,  55,  50,  50,  50,  50,  50,  50,
 };

-const int vp8_gf_interval_table[101] =
+static const int gf_interval_table[101] =
 {
    7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
@@ -353,7 +353,7 @@ void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
        kf_boost = (int)(2 * cpi->output_frame_rate - 16);

        // adjustment up based on q
-        kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
+        kf_boost = kf_boost * kf_boost_qadjustment[cpi->ni_av_qi] / 100;

        // frame separation adjustment ( down)
        if (cpi->frames_since_key  < cpi->output_frame_rate / 2)
@@ -488,10 +488,10 @@ static void calc_gf_params(VP8_COMP *cpi)
            Boost = GFQ_ADJUSTMENT;

            // Adjust based upon most recently measure intra useage
-            Boost = Boost * vp8_gf_intra_useage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;
+            Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;

            // Adjust gf boost based upon GF usage since last GF
-            Boost = Boost * vp8_gf_adjust_table[gf_frame_useage] / 100;
+            Boost = Boost * gf_adjust_table[gf_frame_useage] / 100;
 #endif
        }

@@ -503,8 +503,8 @@ static void calc_gf_params(VP8_COMP *cpi)
        }

        // Apply an upper limit based on Q for 1 pass encodes
-        if (Boost > vp8_kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
-            Boost = vp8_kf_gf_boost_qlimits[Q];
+        if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
+            Boost = kf_gf_boost_qlimits[Q];

        // Apply lower limits to boost.
        else if (Boost < 110)
@@ -539,8 +539,8 @@ static void calc_gf_params(VP8_COMP *cpi)
            if (cpi->last_boost >= 1500)
                cpi->frames_till_gf_update_due ++;

-            if (vp8_gf_interval_table[gf_frame_useage] > cpi->frames_till_gf_update_due)
-                cpi->frames_till_gf_update_due = vp8_gf_interval_table[gf_frame_useage];
+            if (gf_interval_table[gf_frame_useage] > cpi->frames_till_gf_update_due)
+                cpi->frames_till_gf_update_due = gf_interval_table[gf_frame_useage];

            if (cpi->frames_till_gf_update_due > cpi->max_gf_interval)
                cpi->frames_till_gf_update_due = cpi->max_gf_interval;
@@ -594,17 +594,17 @@ void vp8_calc_iframe_target_size(VP8_COMP *cpi)
        // between key frames.

        // Adjust boost based upon ambient Q
-        Boost = vp8_kf_boost_qadjustment[Q];
+        Boost = kf_boost_qadjustment[Q];

        // Make the Key frame boost less if the seperation from the previous key frame is small
        if (cpi->frames_since_key < 16)
-            Boost = Boost * vp8_kf_boost_seperationt_adjustment[cpi->frames_since_key] / 100;
+            Boost = Boost * kf_boost_seperation_adjustment[cpi->frames_since_key] / 100;
        else
-            Boost = Boost * vp8_kf_boost_seperationt_adjustment[15] / 100;
+            Boost = Boost * kf_boost_seperation_adjustment[15] / 100;

        // Apply limits on boost
-        if (Boost > vp8_kf_gf_boost_qlimits[Q])
-            Boost = vp8_kf_gf_boost_qlimits[Q];
+        if (Boost > kf_gf_boost_qlimits[Q])
+            Boost = kf_gf_boost_qlimits[Q];
        else if (Boost < 120)
            Boost = 120;
    }
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -53,7 +53,7 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);



-const int vp8_auto_speed_thresh[17] =
+static const int auto_speed_thresh[17] =
 {
    1000,
    200,
@@ -353,7 +353,7 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
                }
            }

-            if (milliseconds_for_compress * 100 > cpi->avg_encode_time * vp8_auto_speed_thresh[cpi->Speed])
+            if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
            {
                cpi->Speed          -= 1;
                cpi->avg_pick_mode_time = 0;
@@ -1000,13 +1000,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
    return distortion;
 }

-unsigned char vp8_mbsplit_offset2[4][16] = {
-    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
-    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
-};
-

 static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};

@@ -1034,8 +1027,8 @@ typedef struct
 } BEST_SEG_INFO;


-void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
-                         unsigned int segmentation)
+static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
+                             BEST_SEG_INFO *bsi, unsigned int segmentation)
 {
    int i;
    int const *labels;
@@ -1153,7 +1146,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
                    int sadpb = x->sadperbit4;

                    // find first label
-                    n = vp8_mbsplit_offset2[segmentation][i];
+                    n = vp8_mbsplit_offset[segmentation][i];

                    c = &x->block[n];
                    e = &x->e_mbd.block[n];
@@ -1332,16 +1325,16 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
    {
        /* for now, we will keep the original segmentation order
           when in best quality mode */
-        vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
-        vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
-        vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
-        vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
+        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
+        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
+        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
+        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
    }
    else
    {
        int sr;

-        vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
+        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);

        if (bsi.segment_rd < best_rd)
        {
@@ -1380,7 +1373,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
                sr = MAXF((abs(bsi.sv_mvp[1].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[1].col - bsi.sv_mvp[3].col))>>3);
                vp8_cal_step_param(sr, &bsi.sv_istep[1]);

-                vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
+                rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
            }

            /* block 16X8 */
@@ -1391,7 +1384,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
                sr = MAXF((abs(bsi.sv_mvp[2].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[2].col - bsi.sv_mvp[3].col))>>3);
                vp8_cal_step_param(sr, &bsi.sv_istep[1]);

-                vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
+                rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
            }

            /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
@@ -1399,7 +1392,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
            if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
            {
                bsi.mvp = &bsi.sv_mvp[0];
-                vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
+                rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
            }

            /* restore UMV window */
@@ -1432,7 +1425,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
    {
        int j;

-        j = vp8_mbsplit_offset2[bsi.segment_num][i];
+        j = vp8_mbsplit_offset[bsi.segment_num][i];

        x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
        x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,9 +26,9 @@ _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
 void vp8_fix_contexts(MACROBLOCKD *x);

-TOKENVALUE vp8_dct_value_tokens[DCT_MAX_VALUE*2];
+static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE*2];
 const TOKENVALUE *vp8_dct_value_tokens_ptr;
-int vp8_dct_value_cost[DCT_MAX_VALUE*2];
+static int dct_value_cost[DCT_MAX_VALUE*2];
 const int *vp8_dct_value_cost_ptr;
 #if 0
 int skip_true_count = 0;
@@ -37,7 +37,7 @@ int skip_false_count = 0;
 static void fill_value_tokens()
 {

-    TOKENVALUE *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
+    TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
    vp8_extra_bit_struct *const e = vp8_extra_bits;

    int i = -DCT_MAX_VALUE;
@@ -81,7 +81,7 @@ static void fill_value_tokens()
                    cost += vp8_treed_cost(p->tree, p->prob, extra >> 1, Length);

                cost += vp8_cost_bit(vp8_prob_half, extra & 1); /* sign */
-                vp8_dct_value_cost[i + DCT_MAX_VALUE] = cost;
+                dct_value_cost[i + DCT_MAX_VALUE] = cost;
            }

        }
@@ -89,8 +89,8 @@ static void fill_value_tokens()
    }
    while (++i < DCT_MAX_VALUE);

-    vp8_dct_value_tokens_ptr = vp8_dct_value_tokens + DCT_MAX_VALUE;
-    vp8_dct_value_cost_ptr   = vp8_dct_value_cost + DCT_MAX_VALUE;
+    vp8_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE;
+    vp8_dct_value_cost_ptr   = dct_value_cost + DCT_MAX_VALUE;
 }

 static void tokenize2nd_order_b
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -10,33 +10,8 @@


 #include "variance.h"
+#include "vp8/common/filter.h"

-const int vp8_six_tap[8][6] =
-{
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
-    { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
-    { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
-    { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
-    { 0, -1,   12,  123,  -6,  0 }
-};
-
-
-const int VP8_FILTER_WEIGHT = 128;
-const int VP8_FILTER_SHIFT  =   7;
-const int vp8_bilinear_taps[8][2] =
-{
-    { 128,   0 },
-    { 112,  16 },
-    {  96,  32 },
-    {  80,  48 },
-    {  64,  64 },
-    {  48,  80 },
-    {  32,  96 },
-    {  16, 112 }
-};

 unsigned int vp8_get_mb_ss_c
 (
@@ -56,7 +31,7 @@ unsigned int vp8_get_mb_ss_c
 }


-void  vp8_variance(
+static void variance(
    const unsigned char *src_ptr,
    int  source_stride,
    const unsigned char *ref_ptr,
@@ -98,7 +73,7 @@ vp8_get8x8var_c
 )
 {

-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
    return (*SSE - (((*Sum) * (*Sum)) >> 6));
 }

@@ -114,7 +89,7 @@ vp8_get16x16var_c
 )
 {

-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
    return (*SSE - (((*Sum) * (*Sum)) >> 8));

 }
@@ -132,7 +107,7 @@ unsigned int vp8_variance16x16_c(
    int avg;


-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    *sse = var;
    return (var - ((avg * avg) >> 8));
 }
@@ -148,7 +123,7 @@ unsigned int vp8_variance8x16_c(
    int avg;


-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
    *sse = var;
    return (var - ((avg * avg) >> 7));
 }
@@ -164,7 +139,7 @@ unsigned int vp8_variance16x8_c(
    int avg;


-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
    *sse = var;
    return (var - ((avg * avg) >> 7));
 }
@@ -181,7 +156,7 @@ unsigned int vp8_variance8x8_c(
    int avg;


-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
    *sse = var;
    return (var - ((avg * avg) >> 6));
 }
@@ -197,7 +172,7 @@ unsigned int vp8_variance4x4_c(
    int avg;


-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
    *sse = var;
    return (var - ((avg * avg) >> 4));
 }
@@ -213,7 +188,7 @@ unsigned int vp8_mse16x16_c(
    unsigned int var;
    int avg;

-    vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
+    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    *sse = var;
    return var;
 }
@@ -247,7 +222,7 @@ unsigned int vp8_mse16x16_c(
 *                  to the next.
 *
 ****************************************************************************/
-void vp8e_filter_block2d_bil_first_pass
+static void var_filter_block2d_bil_first_pass
 (
    const unsigned char *src_ptr,
    unsigned short *output_ptr,
@@ -255,7 +230,7 @@ void vp8e_filter_block2d_bil_first_pass
    int pixel_step,
    unsigned int output_height,
    unsigned int output_width,
-    const int *vp8_filter
+    const short *vp8_filter
 )
 {
    unsigned int i, j;
@@ -305,7 +280,7 @@ void vp8e_filter_block2d_bil_first_pass
 *                  to the next.
 *
 ****************************************************************************/
-void vp8e_filter_block2d_bil_second_pass
+static void var_filter_block2d_bil_second_pass
 (
    const unsigned short *src_ptr,
    unsigned char  *output_ptr,
@@ -313,7 +288,7 @@ void vp8e_filter_block2d_bil_second_pass
    unsigned int  pixel_step,
    unsigned int  output_height,
    unsigned int  output_width,
-    const int *vp8_filter
+    const short *vp8_filter
 )
 {
    unsigned int  i, j;
@@ -338,52 +313,6 @@ void vp8e_filter_block2d_bil_second_pass
 }


-/****************************************************************************
- *
- *  ROUTINE       : filter_block2d_bil
- *
- *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
- *                  UINT32 src_pixels_per_line : Stride of input block.
- *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
- *                  INT32  *VFilter         : Array of 2 vertical filter taps.
- *
- *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : 2-D filters an 8x8 input block by applying a 2-tap
- *                  bi-linear filter horizontally followed by a 2-tap
- *                  bi-linear filter vertically on the result.
- *
- *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
- *                  1 more point than the input block in each column. This
- *                  is to ensure that the 2-tap filter has one extra data-point
- *                  at the top of each column so filter taps do not extend
- *                  beyond data. Thus the output of the first stage filter
- *                  is an 8x9 (hx_v) block.
- *
- ****************************************************************************/
-void vp8e_filter_block2d_bil
-(
-    const unsigned char  *src_ptr,
-    unsigned char *output_ptr,
-    unsigned int src_pixels_per_line,
-    int  *HFilter,
-    int  *VFilter
-)
-{
-
-    unsigned short FData[20*16];    // Temp data bufffer used in filtering
-
-    // First filter 1-D horizontally...
-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter);
-
-    // then 1-D vertically...
-    vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter);
-}
-
-
-
 unsigned int vp8_sub_pixel_variance4x4_c
 (
    const unsigned char  *src_ptr,
@@ -396,17 +325,17 @@ unsigned int vp8_sub_pixel_variance4x4_c
 )
 {
    unsigned char  temp2[20*16];
-    const int *HFilter, *VFilter;
+    const short *HFilter, *VFilter;
    unsigned short FData3[5*4]; // Temp data bufffer used in filtering

-    HFilter = vp8_bilinear_taps[xoffset];
-    VFilter = vp8_bilinear_taps[yoffset];
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];

    // First filter 1d Horizontal
-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
+    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);

    // Now filter Verticaly
-    vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
+    var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);

    return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
 }
@@ -425,13 +354,13 @@ unsigned int vp8_sub_pixel_variance8x8_c
 {
    unsigned short FData3[9*8]; // Temp data bufffer used in filtering
    unsigned char  temp2[20*16];
-    const int *HFilter, *VFilter;
+    const short *HFilter, *VFilter;

-    HFilter = vp8_bilinear_taps[xoffset];
-    VFilter = vp8_bilinear_taps[yoffset];
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];

-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
-    vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
+    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
+    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);

    return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
@@ -449,13 +378,13 @@ unsigned int vp8_sub_pixel_variance16x16_c
 {
    unsigned short FData3[17*16];   // Temp data bufffer used in filtering
    unsigned char  temp2[20*16];
-    const int *HFilter, *VFilter;
+    const short *HFilter, *VFilter;

-    HFilter = vp8_bilinear_taps[xoffset];
-    VFilter = vp8_bilinear_taps[yoffset];
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];

-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
-    vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
+    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
+    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);

    return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
 }
@@ -525,13 +454,13 @@ unsigned int vp8_sub_pixel_variance16x8_c
 {
    unsigned short FData3[16*9];    // Temp data bufffer used in filtering
    unsigned char  temp2[20*16];
-    const int *HFilter, *VFilter;
+    const short *HFilter, *VFilter;

-    HFilter = vp8_bilinear_taps[xoffset];
-    VFilter = vp8_bilinear_taps[yoffset];
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];

-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
-    vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
+    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
+    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);

    return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
 }
@@ -549,15 +478,15 @@ unsigned int vp8_sub_pixel_variance8x16_c
 {
    unsigned short FData3[9*16];    // Temp data bufffer used in filtering
    unsigned char  temp2[20*16];
-    const int *HFilter, *VFilter;
+    const short *HFilter, *VFilter;


-    HFilter = vp8_bilinear_taps[xoffset];
-    VFilter = vp8_bilinear_taps[yoffset];
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];


-    vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
-    vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
+    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
+    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);

    return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -53,13 +53,6 @@ extern unsigned int vp8_get4x4var_mmx
    unsigned int *SSE,
    int *Sum
 );
-extern unsigned int vp8_get4x4sse_cs_mmx
-(
-    const unsigned char *src_ptr,
-    int  source_stride,
-    const unsigned char *ref_ptr,
-    int  recon_stride
-);
 extern void vp8_filter_block2d_bil4x4_var_mmx
 (
    const unsigned char *ref_ptr,
@@ -92,39 +85,6 @@ extern unsigned int vp8_get16x16pred_error_mmx
 );


-void vp8_test_get_mb_ss(void)
-{
-    short zz[] =
-    {
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-        -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
-    };
-    int s = 0, x = vp8_get_mb_ss_mmx(zz);
-    {
-        int y;
-
-        for (y = 0; y < 256; y++)
-            s += (zz[y] * zz[y]);
-    }
-
-    x += 0;
-}
-
-
 unsigned int vp8_get16x16var_mmx(
    const unsigned char *src_ptr,
    int  source_stride,
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -16,7 +16,7 @@


 #if HAVE_MMX
-void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
+static void short_fdct8x4_mmx(short *input, short *output, int pitch)
 {
    vp8_short_fdct4x4_mmx(input,   output,    pitch);
    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
@@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                 short *qcoeff_ptr, short *dequant_ptr,
                                 short *scan_mask, short *round_ptr,
                                 short *quant_ptr, short *dqcoeff_ptr);
-void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
+static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
 {
    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
    short *coeff_ptr   = b->coeff;
@@ -51,7 +51,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
 }

 int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
+static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
 {
    short *coeff_ptr =  mb->block[0].coeff;
    short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
@@ -59,7 +59,7 @@ int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
 }

 int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-int vp8_mbuverror_mmx(MACROBLOCK *mb)
+static int mbuverror_mmx(MACROBLOCK *mb)
 {
    short *s_ptr = &mb->coeff[256];
    short *d_ptr = &mb->e_mbd.dqcoeff[256];
@@ -69,7 +69,7 @@ int vp8_mbuverror_mmx(MACROBLOCK *mb)
 void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
                             short *diff, unsigned char *predictor,
                             int pitch);
-void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
+static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
 {
    unsigned char *z = *(be->base_src) + be->src;
    unsigned int  src_stride = be->src_stride;
@@ -85,7 +85,7 @@ int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
                                 short *qcoeff_ptr, short *dequant_ptr,
                                 const short *inv_scan_order, short *round_ptr,
                                 short *quant_ptr, short *dqcoeff_ptr);
-void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
 {
    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
    short *coeff_ptr   = b->coeff;
@@ -115,7 +115,7 @@ int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
                                     short *zbin_boost_ptr,
                                     short *quant_shift_ptr);

-void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
+static void regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
 {
    d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff,
                                              b->zbin,
@@ -131,7 +131,7 @@ void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
 }

 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
+static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
 {
    short *coeff_ptr =  mb->block[0].coeff;
    short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
@@ -139,7 +139,7 @@ int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
 }

 int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-int vp8_mbuverror_xmm(MACROBLOCK *mb)
+static int mbuverror_xmm(MACROBLOCK *mb)
 {
    short *s_ptr = &mb->coeff[256];
    short *d_ptr = &mb->e_mbd.dqcoeff[256];
@@ -149,7 +149,7 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
 void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
                             short *diff, unsigned char *predictor,
                             int pitch);
-void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
+static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
 {
    unsigned char *z = *(be->base_src) + be->src;
    unsigned int  src_stride = be->src_stride;
@@ -165,7 +165,7 @@ int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
                                 short *qcoeff_ptr, short *dequant_ptr,
                                 short *round_ptr,
                                 short *quant_ptr, short *dqcoeff_ptr);
-void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
+static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
 {
    d->eob = vp8_fast_quantize_b_impl_ssse3(
                    b->coeff,
@@ -251,20 +251,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;

        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
+        cpi->rtcd.fdct.short8x4                  = short_fdct8x4_mmx;
        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
+        cpi->rtcd.fdct.fast8x4                   = short_fdct8x4_mmx;

        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;

        cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
-        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx;
-        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx;
-        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_mmx;
+        cpi->rtcd.encodemb.mberr                 = mbblock_error_mmx;
+        cpi->rtcd.encodemb.mbuverr               = mbuverror_mmx;
+        cpi->rtcd.encodemb.subb                  = subtract_b_mmx;
        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;

-        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
+        /*cpi->rtcd.quantize.fastquantb            = fast_quantize_b_mmx;*/
    }
 #endif

@@ -311,16 +311,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;

        cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
-        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
-        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
-        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2;
+        cpi->rtcd.encodemb.mberr                 = mbblock_error_xmm;
+        cpi->rtcd.encodemb.mbuverr               = mbuverror_xmm;
+        cpi->rtcd.encodemb.subb                  = subtract_b_sse2;
        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2;
        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2;

 #if ARCH_X86
-        cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2;
+        cpi->rtcd.quantize.quantb                = regular_quantize_b_sse2;
 #endif
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
+        cpi->rtcd.quantize.fastquantb            = fast_quantize_b_sse2;

 #if !(CONFIG_REALTIME_ONLY)
        cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
@@ -358,7 +358,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;

-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;
+        cpi->rtcd.quantize.fastquantb            = fast_quantize_b_ssse3;

 #if CONFIG_PSNR
 #if ARCH_X86_64
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -69,7 +69,7 @@ VP8_COMMON_SRCS-yes += common/reconintra.c
 VP8_COMMON_SRCS-yes += common/reconintra4x4.c
 VP8_COMMON_SRCS-yes += common/setupintrarecon.c
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
-VP8_COMMON_SRCS-yes += common/textblit.c
+VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
 VP8_COMMON_SRCS-yes += common/treecoder.c

 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -19,6 +19,7 @@ VP8_CX_SRCS-$(ARCH_ARM)  += encoder/asm_enc_offsets.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/dct_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/variance_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/variance_arm.h
 VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
@@ -34,6 +35,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_ar

 #File list for armv6
 # encoder
+VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_fast_fdct4x4_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -36,6 +36,8 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t      *ctx,
        res = VPX_CODEC_INCAPABLE;
    else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
        res = VPX_CODEC_INCAPABLE;
+    else if (!(iface->caps & VPX_CODEC_CAP_DECODER))
+        res = VPX_CODEC_INCAPABLE;
    else
    {
        memset(ctx, 0, sizeof(*ctx));
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -88,24 +88,3 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int

    return 0;
 }
-
-/****************************************************************************
- *
- ****************************************************************************/
-int
-vp8_yv12_black_frame_buffer(YV12_BUFFER_CONFIG *ybf)
-{
-    if (ybf)
-    {
-        if (ybf->buffer_alloc)
-        {
-            duck_memset(ybf->y_buffer, 0x0, ybf->y_stride * ybf->y_height);
-            duck_memset(ybf->u_buffer, 0x80, ybf->uv_stride * ybf->uv_height);
-            duck_memset(ybf->v_buffer, 0x80, ybf->uv_stride * ybf->uv_height);
-        }
-
-        return 0;
-    }
-
-    return -1;
-}
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -145,8 +145,8 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
 }


-void
-vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
+static void
+extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
 {
    int i;
    unsigned char *src_ptr1, *src_ptr2;
@@ -276,5 +276,5 @@ vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_y
        dest   += dst_ybc->y_stride;
    }

-    vp8_yv12_extend_frame_borders_yonly(dst_ybc);
+    extend_frame_borders_yonly(dst_ybc);
 }
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -63,7 +63,6 @@ extern "C"

    int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
    int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
-    int vp8_yv12_black_frame_buffer(YV12_BUFFER_CONFIG *ybf);

 #ifdef __cplusplus
 }
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -924,8 +924,14 @@ static const arg_def_t resize_up_thresh   = ARG_DEF(NULL, "resize-up", 1,
        "Upscale threshold (buf %)");
 static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
        "Downscale threshold (buf %)");
-static const arg_def_t end_usage          = ARG_DEF(NULL, "end-usage", 1,
-        "VBR=0 | CBR=1 | CQ=2");
+static const struct arg_enum_list end_usage_enum[] = {
+    {"vbr", VPX_VBR},
+    {"cbr", VPX_CBR},
+    {"cq",  VPX_CQ},
+    {NULL, 0}
+};
+static const arg_def_t end_usage          = ARG_DEF_ENUM(NULL, "end-usage", 1,
+        "Rate control mode", end_usage_enum);
 static const arg_def_t target_bitrate     = ARG_DEF(NULL, "target-bitrate", 1,
        "Bitrate (kbps)");
 static const arg_def_t min_quantizer      = ARG_DEF(NULL, "min-q", 1,
@@ -1256,7 +1262,7 @@ int main(int argc, const char **argv_)
        else if (arg_match(&arg, &resize_down_thresh, argi))
            cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
        else if (arg_match(&arg, &end_usage, argi))
-            cfg.rc_end_usage = arg_parse_uint(&arg);
+            cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
        else if (arg_match(&arg, &target_bitrate, argi))
            cfg.rc_target_bitrate = arg_parse_uint(&arg);
        else if (arg_match(&arg, &min_quantizer, argi))
Author	SHA1	Message	Date
John Koleszar	744a58bc1c	vpx_codec_dec_init: check that the iface is a decoder Make sure the given interface is actually a decoder interface before initializing it. Change-Id: Ie48d737f2956cc2f0891666de5ea87251e96bc49	2011-03-24 15:05:10 +02:00
John Koleszar	86b5556f5a	Remove unused vp8_get4x4sse_cs_mmx declaration This declaration did not match the prototype_sad() prototype, but was unused in this translation unit, so it is removed instead. Fixes issue 290. Change-Id: I168854f88a85f73ca9aaf61d1e5dc0f43fc3fdb3	2011-03-24 15:05:10 +02:00
John Koleszar	4375b4ac39	Allow specifying --end-usage by enum name Map an enum to the --end-usage values, so you can specify --end-usage=cq instead of --end-usage=2. The numerical values still work for historical scripts, etc, but this is more user friendly. Change-Id: I445ecd9638f801f5924a71eabf449bee293cdd34	2011-03-24 15:05:10 +02:00
Tero Rintaluoma	71595edd47	ARMv6 optimized fdct4x4 Optimized fdct4x4 (8x4) for ARMv6 instruction set. - No interlocks in Cortex-A8 pipeline - One interlock cycle in ARM11 pipeline - About 2.16 times faster than current C-code compiled with -O3 Change-Id: I60484ecd144365da45bb68a960d30196b59952b8	2011-03-24 15:05:10 +02:00
Attila Nagy	848dddee15	Fix multithreaded encoding for 1 MB wide frame Thread synchronization was not correct when frame width was 1 MB. Number of allocated encoding threads is limited by the sync_range. There is no point having more because each thread lags sync_range MBs behind the thread processing the row above. http://code.google.com/p/webm/issues/detail?id=302 Change-Id: Icaf67a883beecc5ebf2f11e9be47b6997fdf6f26	2011-03-24 15:05:09 +02:00
John Koleszar	f1ba70e199	Increase static linkage, remove unused functions A large number of functions were defined with external linkage, even though they were only used from within one file. This patch changes their linkage to static and removes the vp8_ prefix from their names, which should make it more obvious to the reader that the function is contained within the current translation unit. Functions that were not referenced were removed. These symbols were identified by: $ nm -A libvpx.a \| sort -k3 \| uniq -c -f2 \| grep ' [A-Z] ' \ \| sort \| grep '^ *1 ' Change-Id: I59609f58ab65312012c047036ae1e0634f795779	2011-03-24 15:05:09 +02:00
Attila Nagy	a22df2e29d	use semaphore for partition thread synch Change-Id: If368371097d93614ae497d99be2d39c7b0eb5f47	2011-03-18 13:25:51 +02:00