am 6d4e8c1e: am 003be710: Merge "Clean up some ARMv4/ARMv5 cruft."

* commit '6d4e8c1ea03f332dd86763995d6219ea50a3bce6': Clean up some ARMv4/ARMv5 cruft.
2013-12-13 23:51:29 +00:00
parent b77ef8dc81 6d4e8c1ea0
commit 56f7c97e07
10 changed files with 51 additions and 207 deletions
--- a/libc/arch-arm/bionic/memcmp16.S
+++ b/libc/arch-arm/bionic/memcmp16.S
@@ -32,15 +32,15 @@
 /*
 * Optimized memcmp16() for ARM9.
 * This would not be optimal on XScale or ARM11, where more prefetching
- * and use of PLD will be needed.
+ * and use of pld will be needed.
 * The 2 major optimzations here are
 * (1) The main loop compares 16 bytes at a time
 * (2) The loads are scheduled in a way they won't stall
 */
 ENTRY(__memcmp16)
-        PLD         (r0, #0)
+        pld         [r0, #0]
-        PLD         (r1, #0)
+        pld         [r1, #0]
        /* take of the case where length is nul or the buffers are the same */
        cmp         r0, r1
@@ -62,13 +62,13 @@ ENTRY(__memcmp16)
        bpl         0f
        /* small blocks (less then 12 words) */
-        PLD         (r0, #32)
+        pld         [r0, #32]
-        PLD         (r1, #32)
+        pld         [r1, #32]
 1:      ldrh        r0, [r3], #2
        ldrh        ip, [r1], #2
        subs        r0, r0, ip
-        bxne        lr        
+        bxne        lr
        subs        r2, r2, #1
        bne         1b
        bx          lr
@@ -79,11 +79,11 @@ ENTRY(__memcmp16)
        .cfi_def_cfa_offset 8
        .cfi_rel_offset r4, 0
        .cfi_rel_offset lr, 4
-        
+
        /* align first pointer to word boundary */
        tst         r3, #2
        beq         0f
-        
+
        ldrh        r0, [r3], #2
        ldrh        ip, [r1], #2
        sub         r2, r2, #1
@@ -111,10 +111,10 @@ ENTRY(__memcmp16)
        ldr         ip, [r1]
        subs        r2, r2, #(16 + 2)
        bmi         1f
-        
+
 0:
-        PLD         (r3, #64)
+        pld         [r3, #64]
-        PLD         (r1, #64)
+        pld         [r1, #64]
        ldr         r0, [r3], #4
        ldr         lr, [r1, #4]!
        eors        r0, r0, ip
@@ -139,14 +139,14 @@ ENTRY(__memcmp16)
        ldreq       r0, [r3], #4
        ldreq       ip, [r1, #4]!
        eoreqs      r0, r0, lr
-        bne         2f        
+        bne         2f
        subs        r2, r2, #16
        bhs         0b
        /* do we have at least 2 words left? */
 1:      adds        r2, r2, #(16 - 2 + 2)
        bmi         4f
-        
+
        /* finish off 2 words at a time */
 3:      ldr         r0, [r3], #4
        ldr         ip, [r1], #4
@@ -195,8 +195,8 @@ ENTRY(__memcmp16)
        sub         r2, r2, #8
 6:
-        PLD         (r3, #64)
+        pld         [r3, #64]
-        PLD         (r1, #64)
+        pld         [r1, #64]
        mov         ip, lr, lsr #16
        ldr         lr, [r1], #4
        ldr         r0, [r3], #4
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -352,9 +352,9 @@ ENTRY(memcpy)
        // preload the destination because we'll align it to a cache line
        // with small writes. Also start the source "pump".
-        PLD         (r0, #0)
+        pld         [r0, #0]
-        PLD         (r1, #0)
+        pld         [r1, #0]
-        PLD         (r1, #32)
+        pld         [r1, #32]
 		/* it simplifies things to take care of len<4 early */
 		cmp			r2, #4
@@ -442,7 +442,7 @@ cached_aligned32:
        add         r12, r12, #64
 1:      ldmia       r1!, { r4-r11 }
-        PLD         (r12, #64)
+        pld         [r12, #64]
        subs        r2, r2, #32
        // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
@@ -563,7 +563,7 @@ loop16:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
        subs        r2, r2, #32
        ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #16
@@ -590,7 +590,7 @@ loop8:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
 		subs		r2, r2, #32
        ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #24
@@ -617,7 +617,7 @@ loop24:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
 		subs		r2, r2, #32
        ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #8
--- a/libc/arch-arm/bionic/strcmp.S
+++ b/libc/arch-arm/bionic/strcmp.S
@@ -52,8 +52,8 @@
 #define magic2(REG) REG, lsl #7
 ENTRY(strcmp)
-	PLD(r0, #0)
+	pld	[r0, #0]
-	PLD(r1, #0)
+	pld	[r1, #0]
 	eor	r2, r0, r1
 	tst	r2, #3
@@ -88,8 +88,8 @@ ENTRY(strcmp)
 	orr	r4, r4, r4, lsl #16
 	.p2align	2
 4:
-	PLD(r0, #8)
+	pld	[r0, #8]
-	PLD(r1, #8)
+	pld	[r1, #8]
 	sub	r2, ip, magic1(r4)
 	cmp	ip, r3
 	itttt	eq
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -57,9 +57,9 @@ ENTRY(memcpy)
        // preload the destination because we'll align it to a cache line
        // with small writes. Also start the source "pump".
-        PLD         (r0, #0)
+        pld         [r0, #0]
-        PLD         (r1, #0)
+        pld         [r1, #0]
-        PLD         (r1, #32)
+        pld         [r1, #32]
        /* it simplifies things to take care of len<4 early */
        cmp         r2, #4
@@ -147,7 +147,7 @@ cached_aligned32:
        add         r12, r12, #64
 1:      ldmia       r1!, { r4-r11 }
-        PLD         (r12, #64)
+        pld         [r12, #64]
        subs        r2, r2, #32
        // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
@@ -268,7 +268,7 @@ loop16:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
        subs        r2, r2, #32
        ldrhs       r12, [r1], #4
        orr         r3, r3, r4,     lsl #16
@@ -295,7 +295,7 @@ loop8:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
        subs        r2, r2, #32
        ldrhs       r12, [r1], #4
        orr         r3, r3, r4,     lsl #24
@@ -322,7 +322,7 @@ loop24:
        ldr         r12, [r1], #4
 1:      mov         r4, r12
        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
        subs        r2, r2, #32
        ldrhs       r12, [r1], #4
        orr         r3, r3, r4,     lsl #8
--- a/libc/arch-arm/generic/bionic/strcmp.S
+++ b/libc/arch-arm/generic/bionic/strcmp.S
@@ -52,8 +52,8 @@
 #define magic2(REG) REG, lsl #7
 ENTRY(strcmp)
-	PLD(r0, #0)
+	pld	[r0, #0]
-	PLD(r1, #0)
+	pld	[r1, #0]
 	eor	r2, r0, r1
 	tst	r2, #3
@@ -88,8 +88,8 @@ ENTRY(strcmp)
 	orr	r4, r4, r4, lsl #16
 	.p2align	2
 4:
-	PLD(r0, #8)
+	pld	[r0, #8]
-	PLD(r1, #8)
+	pld	[r1, #8]
 	sub	r2, ip, magic1(r4)
 	cmp	ip, r3
 	itttt	eq
--- a/libc/arch-arm/generic/bionic/strcpy.S
+++ b/libc/arch-arm/generic/bionic/strcpy.S
@@ -33,7 +33,7 @@
 #include <machine/asm.h>
 ENTRY(strcpy)
-	PLD(r1, #0)
+	pld	[r1, #0]
 	eor	r2, r0, r1
 	mov	ip, r0
 	tst	r2, #3
@@ -62,7 +62,7 @@ ENTRY(strcpy)
 	  load stalls.  */
 	.p2align 2
 2:
-	PLD(r1, #8)
+	pld	[r1, #8]
 	ldr	r4, [r1], #4
 	sub	r2, r3, r5
 	bics	r2, r2, r3
--- a/libc/arch-arm/generic/bionic/strlen.c
+++ b/libc/arch-arm/generic/bionic/strlen.c
@@ -63,9 +63,7 @@ size_t strlen(const char *s)
        "ldr     %[v], [%[s]], #4           \n"
        "sub     %[l], %[l], %[s]           \n"
        "0:                                 \n"
 #if __ARM_HAVE_PLD
        "pld     [%[s], #64]                \n"
 #endif
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
--- a/libc/arch-arm/include/machine/cpu-features.h
+++ b/libc/arch-arm/include/machine/cpu-features.h
@@ -34,133 +34,29 @@
 *
 * This is done to abstract us from the various ARM Architecture
 * quirks and alphabet soup.
 *
 * IMPORTANT: We have no intention to support anything below an ARMv4T !
 */
 /* __ARM_ARCH__ is a number corresponding to the ARM revision
- * we're going to support
+ * we're going to support. Our toolchain doesn't define __ARM_ARCH__
 *
 * it looks like our toolchain doesn't define __ARM_ARCH__
 * so try to guess it.
 *
 *
 *
 */
 #ifndef __ARM_ARCH__
 #  if defined __ARM_ARCH_7__   || defined __ARM_ARCH_7A__ || \
-      defined __ARM_ARCH_7R__  || defined __ARM_ARCH_7M__
+        defined __ARM_ARCH_7R__  || defined __ARM_ARCH_7M__
 #    define __ARM_ARCH__ 7
 #  elif defined __ARM_ARCH_6__   || defined __ARM_ARCH_6J__ || \
-      defined __ARM_ARCH_6K__  || defined __ARM_ARCH_6Z__ || \
+        defined __ARM_ARCH_6K__  || defined __ARM_ARCH_6Z__ || \
-      defined __ARM_ARCH_6KZ__ || defined __ARM_ARCH_6T2__
+        defined __ARM_ARCH_6KZ__ || defined __ARM_ARCH_6T2__
 #
 #    define __ARM_ARCH__ 6
 #
 #  elif defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || \
        defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__
 #
 #    define __ARM_ARCH__ 5
 #
 #  elif defined __ARM_ARCH_4T__
 #
 #    define __ARM_ARCH__ 4
 #
 #  elif defined __ARM_ARCH_4__
 #    error ARMv4 is not supported, please use ARMv4T at a minimum
 #  else
 #    error Unknown or unsupported ARM architecture
 #  endif
 #endif
 /* experimental feature used to check that our ARMv4 workarounds
 * work correctly without a real ARMv4 machine */
 #ifdef BIONIC_EXPERIMENTAL_FORCE_ARMV4
 #  undef  __ARM_ARCH__
 #  define __ARM_ARCH__  4
 #endif
 /* define __ARM_HAVE_5TE if we have the ARMv5TE instructions */
 #if __ARM_ARCH__ > 5
 #  define  __ARM_HAVE_5TE  1
 #elif __ARM_ARCH__ == 5
 #  if defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__
 #    define __ARM_HAVE_5TE  1
 #  endif
 #endif
 /* instructions introduced in ARMv5 */
 #if __ARM_ARCH__ >= 5
 #  define  __ARM_HAVE_BLX  1
 #  define  __ARM_HAVE_CLZ  1
 #  define  __ARM_HAVE_LDC2 1
 #  define  __ARM_HAVE_MCR2 1
 #  define  __ARM_HAVE_MRC2 1
 #  define  __ARM_HAVE_STC2 1
 #endif
 /* ARMv5TE introduces a few instructions */
 #if __ARM_HAVE_5TE
 #  define  __ARM_HAVE_PLD   1
 #  define  __ARM_HAVE_MCRR  1
 #  define  __ARM_HAVE_MRRC  1
 #endif
 /* define __ARM_HAVE_HALFWORD_MULTIPLY when half-word multiply instructions
 * this means variants of: smul, smulw, smla, smlaw, smlal
 */
-#if __ARM_HAVE_5TE
+#define  __ARM_HAVE_HALFWORD_MULTIPLY  1
 #  define  __ARM_HAVE_HALFWORD_MULTIPLY  1
 #endif
 /* define __ARM_HAVE_PAIR_LOAD_STORE when 64-bit memory loads and stored
 * into/from a pair of 32-bit registers is supported throuhg 'ldrd' and 'strd'
 */
 #if __ARM_HAVE_5TE
 #  define  __ARM_HAVE_PAIR_LOAD_STORE 1
 #endif
 /* define __ARM_HAVE_SATURATED_ARITHMETIC is you have the saturated integer
 * arithmetic instructions: qdd, qdadd, qsub, qdsub
 */
 #if __ARM_HAVE_5TE
 #  define  __ARM_HAVE_SATURATED_ARITHMETIC 1
 #endif
 /* define __ARM_HAVE_PC_INTERWORK when a direct assignment to the
 * pc register will switch into thumb/ARM mode depending on bit 0
 * of the new instruction address. Before ARMv5, this was not the
 * case, and you have to write:
 *
 *     mov  r0, [<some address>]
 *     bx   r0
 *
 * instead of:
 *
 *     ldr  pc, [<some address>]
 *
 * note that this affects any instruction that explicitly changes the
 * value of the pc register, including ldm { ...,pc } or 'add pc, #offset'
 */
 #if __ARM_ARCH__ >= 5
 #  define __ARM_HAVE_PC_INTERWORK
 #endif
 /* define __ARM_HAVE_LDREX_STREX for ARMv6 and ARMv7 architecture to be
 * used in replacement of deprecated swp instruction
 */
 #if __ARM_ARCH__ >= 6
 #  define __ARM_HAVE_LDREX_STREX
 #endif
 /* define __ARM_HAVE_DMB for ARMv7 architecture
 */
 #if __ARM_ARCH__ >= 7
 #  define __ARM_HAVE_DMB
 #endif
 /* define __ARM_HAVE_LDREXD for ARMv7 architecture
 * (also present in ARMv6K, but not implemented in ARMv7-M, neither of which
@@ -184,18 +80,4 @@
 #  define __ARM_HAVE_NEON
 #endif
 /* Assembly-only macros */
 #ifdef __ASSEMBLY__
 /* define a handy PLD(address) macro since the cache preload
 * is an optional opcode
 */
 #if __ARM_HAVE_PLD
 #  define  PLD(reg,offset)    pld    [reg, offset]
 #else
 #  define  PLD(reg,offset)    /* nothing */
 #endif
 #endif /* ! __ASSEMBLY__ */
 #endif /* _ARM_MACHINE_CPU_FEATURES_H */
--- a/libc/arch-arm/include/machine/endian.h
+++ b/libc/arch-arm/include/machine/endian.h
@@ -33,15 +33,6 @@
 #ifdef __GNUC__
 /*
 * REV and REV16 weren't available on ARM5 or ARM4.
 * We don't include <machine/cpu-features.h> because it pollutes the
 * namespace with macros like PLD.
 */
 #if !defined __ARM_ARCH_5__ && !defined __ARM_ARCH_5T__ && \
    !defined __ARM_ARCH_5TE__ && !defined __ARM_ARCH_5TEJ__ && \
    !defined __ARM_ARCH_4T__ && !defined __ARM_ARCH_4__
 /* According to RealView Assembler User's Guide, REV and REV16 are available
 * in Thumb code and 16-bit instructions when used in Thumb-2 code.
 *
@@ -55,13 +46,13 @@
 */
 #define __swap16md(x) ({                                        \
    register u_int16_t _x = (x);                                \
-    __asm volatile ("rev16 %0, %0" : "+l" (_x));                \
+    __asm__ __volatile__("rev16 %0, %0" : "+l" (_x));           \
    _x;                                                         \
 })
 #define __swap32md(x) ({                                        \
    register u_int32_t _x = (x);                                \
-    __asm volatile ("rev %0, %0" : "+l" (_x));                  \
+    __asm__ __volatile__("rev %0, %0" : "+l" (_x));             \
    _x;                                                         \
 })
@@ -74,7 +65,6 @@
 /* Tell sys/endian.h we have MD variants of the swap macros.  */
 #define MD_SWAP
 #endif  /* __ARM_ARCH__ */
 #endif  /* __GNUC__ */
 #if defined(__ARMEB__)
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@@ -82,47 +82,21 @@
 */
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
 /* Sanity check, multi-core is only supported starting from ARMv6 */
 #  if __ARM_ARCH__ < 6
 #    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
 #  endif
 #  ifdef __ARM_HAVE_DMB
 /* For ARMv7-A, we can use the 'dmb' instruction directly */
-__ATOMIC_INLINE__ void
+__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
 __bionic_memory_barrier(void)
 {
    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
     * bother with __ATOMIC_SWITCH_TO_ARM */
    __asm__ __volatile__ ( "dmb" : : : "memory" );
 }
-#  else /* !__ARM_HAVE_DMB */
+
 /* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
 * which requires the use of a general-purpose register, which is slightly
 * less efficient.
 */
 __ATOMIC_INLINE__ void
 __bionic_memory_barrier(void)
 {
    __asm__ __volatile__ (
        __SWITCH_TO_ARM
        "mcr p15, 0, %0, c7, c10, 5"
        __SWITCH_TO_THUMB
        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
 }
 #  endif /* !__ARM_HAVE_DMB */
 #else /* !ANDROID_SMP */
-__ATOMIC_INLINE__ void
+
-__bionic_memory_barrier(void)
+__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
 {
    /* A simple compiler barrier */
    __asm__ __volatile__ ( "" : : : "memory" );
 }
 #endif /* !ANDROID_SMP */
-#ifndef __ARM_HAVE_LDREX_STREX
+#endif /* !ANDROID_SMP */
 #error Only ARM devices which have LDREX / STREX are supported
 #endif
 /* Compare-and-swap, without any explicit barriers. Note that this functions
 * returns 0 on success, and 1 on failure. The opposite convention is typically