Merge remote branch 'origin/master' into experimental

Change-Id: Ice13978071e98a88cf8ae5c069c6423d74425dea
2011-03-15 00:05:07 -04:00 · 2011-03-15 00:05:07 -04:00 · 54c59a03f3
commit 54c59a03f3
parent eeb8c8004e 8c48c943e7
8 changed files with 174 additions and 40 deletions
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@ -918,15 +918,23 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
                char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
                strncpy(name, ptr, 8);
                //log_msg("COFF: Parsing symbol %s\n",name);
-                /* +1 to avoid printing leading underscore */
-                printf("%-40s EQU ", name + 1);
+                /* The 64bit Windows compiler doesn't prefix with an _.
+                 * Check what's there, and bump if necessary
+                 */
+                if (name[0] == '_')
+                    printf("%-40s EQU ", name + 1);
+                else
+                    printf("%-40s EQU ", name);
            }
            else
            {
                //log_msg("COFF: Parsing symbol %s\n",
                //        buf + strtab_ptr + get_le32(ptr+4));
-                /* +1 to avoid printing leading underscore */
-                printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);
+                if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+                    printf("%-40s EQU ",
+                           buf + strtab_ptr + get_le32(ptr + 4) + 1);
+                else
+                    printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
            }

            if (!(strcmp(sectionlist[section-1], ".bss")))
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@ -296,18 +296,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                            }
                        }

-                        if(pbi->common.filter_level)
-                        {
-                            /*update loopfilter info*/
-                            Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
-                            filter_level = pbi->mt_baseline_filter_level[Segment];
-                            /* Distance of Mb to the various image edges.
-                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                             * Apply any context driven MB level adjustment
-                             */
-                            filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
-                        }
-
                        /* Distance of Mb to the various image edges.
                         * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
                         */
@ -362,7 +350,16 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                }
                            }

-                          /* loopfilter on this macroblock. */
+                            /* update loopfilter info */
+                            Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+                            filter_level = pbi->mt_baseline_filter_level[Segment];
+                            /* Distance of Mb to the various image edges.
+                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                             * Apply any context driven MB level adjustment
+                             */
+                            filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
+
+                            /* loopfilter on this macroblock. */
                            if (filter_level)
                            {
                                if (mb_col > 0)
@ -778,18 +775,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                    }
                }

-                if(pbi->common.filter_level)
-                {
-                    /* update loopfilter info */
-                    Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
-                    filter_level = pbi->mt_baseline_filter_level[Segment];
-                    /* Distance of Mb to the various image edges.
-                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                     * Apply any context driven MB level adjustment
-                     */
-                    filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
-                }
-
                /* Distance of Mb to the various image edges.
                 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
                 */
@ -853,6 +838,15 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                        }
                    }

+                    /* update loopfilter info */
+                    Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+                    filter_level = pbi->mt_baseline_filter_level[Segment];
+                    /* Distance of Mb to the various image edges.
+                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                     * Apply any context driven MB level adjustment
+                     */
+                    filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
+
                    /* loopfilter on this macroblock. */
                    if (filter_level)
                    {
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@ -50,8 +50,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;
        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;

-        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_armv6;
+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

        /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
--- a/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
@ -0,0 +1,133 @@
+;
+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_mse16x16_armv6|
+
+    ARM
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0    unsigned char *src_ptr
+; r1    int source_stride
+; r2    unsigned char *ref_ptr
+; r3    int  recon_stride
+; stack unsigned int *sse
+;
+;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
+;      So, we can remove this part of calculation.
+
+|vp8_mse16x16_armv6| PROC
+
+    push    {r4-r9, lr}
+    mov     r12, #16            ; set loop counter to 16 (=block height)
+
+    mov     r4, #0              ; initialize sse = 0
+
+loop
+    ; 1st 4 pixels
+    ldr     r5, [r0, #0x0]      ; load 4 src pixels
+    ldr     r6, [r2, #0x0]      ; load 4 ref pixels
+
+    mov     lr, #0              ; constant zero
+
+    usub8   r8, r5, r6          ; calculate difference
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    ldr     r5, [r0, #0x4]      ; load 4 src pixels
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 2nd 4 pixels
+    ldr     r6, [r2, #0x4]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+    ldr     r5, [r0, #0x8]      ; load 4 src pixels
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 3rd 4 pixels
+    ldr     r6, [r2, #0x8]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    ldr     r5, [r0, #0xc]      ; load 4 src pixels
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+
+    ; 4th 4 pixels
+    ldr     r6, [r2, #0xc]      ; load 4 ref pixels
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    usub8   r8, r5, r6          ; calculate difference
+    add     r0, r0, r1          ; set src_ptr to next row
+    sel     r7, r8, lr          ; select bytes with positive difference
+    usub8   r9, r6, r5          ; calculate difference with reversed operands
+    add     r2, r2, r3          ; set dst_ptr to next row
+    sel     r8, r9, lr          ; select bytes with negative difference
+
+    ; calculate partial sums
+    usad8   r5, r7, lr          ; calculate sum of positive differences
+    usad8   r6, r8, lr          ; calculate sum of negative differences
+    orr     r8, r8, r7          ; differences of all 4 pixels
+
+    subs    r12, r12, #1        ; next row
+
+    ; calculate sse
+    uxtb16  r6, r8              ; byte (two pixels) to halfwords
+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
+
+    bne     loop
+
+    ; return stuff
+    ldr     r1, [sp, #28]       ; get address of sse
+    mov     r0, r4              ; return sse
+    str     r4, [r1]            ; store sse
+
+    pop     {r4-r9, pc}
+
+    ENDP
+
+    END
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@ -20,6 +20,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
 extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
 extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
 extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
+extern prototype_variance(vp8_mse16x16_armv6);

 #if !CONFIG_RUNTIME_CPU_DETECT

@ -32,6 +33,9 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
 #undef  vp8_variance_var16x16
 #define vp8_variance_var16x16 vp8_variance16x16_armv6

+#undef  vp8_variance_mse16x16
+#define vp8_variance_mse16x16 vp8_mse16x16_armv6
+
 #undef  vp8_variance_halfpixvar16x16_h
 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6

--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@ -37,6 +37,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_ar
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/walsh_v6$(ASM)

 #File list for neon
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@ -332,7 +332,7 @@ typedef struct vpx_codec_priv_cb_pair
 * extended in one of two ways. First, a second, algorithm specific structure
 * can be allocated and the priv member pointed to it. Alternatively, this
 * structure can be made the first member of the algorithm specific structure,
- * and the pointer casted to the proper type.
+ * and the pointer cast to the proper type.
 */
 struct vpx_codec_priv
 {
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@ -168,15 +168,10 @@
    %macro GET_GOT 1
      push %1
      call %%get_got
-      %%sub_offset:
-      jmp  %%exitGG
      %%get_got:
-      mov  %1, [esp]
-      add %1, fake_got - %%sub_offset
-      ret
-      %%exitGG:
+      pop  %1
      %undef GLOBAL
-      %define GLOBAL(x) x + %1 - fake_got
+      %define GLOBAL(x) x + %1 - %%get_got
      %undef RESTORE_GOT
      %define RESTORE_GOT pop %1
    %endmacro
@ -289,7 +284,6 @@
 %elifidn __OUTPUT_FORMAT__,macho32
 %macro SECTION_RODATA 0
 section .text
-fake_got:
 %endmacro
 %else
 %define SECTION_RODATA section .rodata