From c54ca40aef48009e7b0e5b2b3069aad62ffd3453 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 13 Dec 2013 12:17:13 -0800 Subject: [PATCH] Clean up some ARMv4/ARMv5 cruft. Change-Id: I29e836fea4b53901e29f96c6888869c35f6726be --- libc/arch-arm/bionic/memcmp16.S | 30 ++--- libc/arch-arm/bionic/memcpy.S | 14 +- libc/arch-arm/bionic/strcmp.S | 8 +- libc/arch-arm/generic/bionic/memcpy.S | 14 +- libc/arch-arm/generic/bionic/strcmp.S | 8 +- libc/arch-arm/generic/bionic/strcpy.S | 4 +- libc/arch-arm/generic/bionic/strlen.c | 2 - libc/arch-arm/include/machine/cpu-features.h | 128 +------------------ libc/arch-arm/include/machine/endian.h | 14 +- libc/private/bionic_atomic_arm.h | 36 +----- 10 files changed, 51 insertions(+), 207 deletions(-) diff --git a/libc/arch-arm/bionic/memcmp16.S b/libc/arch-arm/bionic/memcmp16.S index 825c94f15..afbb1b047 100644 --- a/libc/arch-arm/bionic/memcmp16.S +++ b/libc/arch-arm/bionic/memcmp16.S @@ -32,15 +32,15 @@ /* * Optimized memcmp16() for ARM9. * This would not be optimal on XScale or ARM11, where more prefetching - * and use of PLD will be needed. + * and use of pld will be needed. * The 2 major optimzations here are * (1) The main loop compares 16 bytes at a time * (2) The loads are scheduled in a way they won't stall */ ENTRY(__memcmp16) - PLD (r0, #0) - PLD (r1, #0) + pld [r0, #0] + pld [r1, #0] /* take of the case where length is nul or the buffers are the same */ cmp r0, r1 @@ -62,13 +62,13 @@ ENTRY(__memcmp16) bpl 0f /* small blocks (less then 12 words) */ - PLD (r0, #32) - PLD (r1, #32) + pld [r0, #32] + pld [r1, #32] 1: ldrh r0, [r3], #2 ldrh ip, [r1], #2 subs r0, r0, ip - bxne lr + bxne lr subs r2, r2, #1 bne 1b bx lr @@ -79,11 +79,11 @@ ENTRY(__memcmp16) .cfi_def_cfa_offset 8 .cfi_rel_offset r4, 0 .cfi_rel_offset lr, 4 - + /* align first pointer to word boundary */ tst r3, #2 beq 0f - + ldrh r0, [r3], #2 ldrh ip, [r1], #2 sub r2, r2, #1 @@ -111,10 +111,10 @@ ENTRY(__memcmp16) ldr ip, [r1] subs r2, r2, #(16 + 2) bmi 1f - + 0: - PLD (r3, #64) - PLD (r1, #64) + pld [r3, #64] + pld [r1, #64] ldr r0, [r3], #4 ldr lr, [r1, #4]! eors r0, r0, ip @@ -139,14 +139,14 @@ ENTRY(__memcmp16) ldreq r0, [r3], #4 ldreq ip, [r1, #4]! eoreqs r0, r0, lr - bne 2f + bne 2f subs r2, r2, #16 bhs 0b /* do we have at least 2 words left? */ 1: adds r2, r2, #(16 - 2 + 2) bmi 4f - + /* finish off 2 words at a time */ 3: ldr r0, [r3], #4 ldr ip, [r1], #4 @@ -195,8 +195,8 @@ ENTRY(__memcmp16) sub r2, r2, #8 6: - PLD (r3, #64) - PLD (r1, #64) + pld [r3, #64] + pld [r1, #64] mov ip, lr, lsr #16 ldr lr, [r1], #4 ldr r0, [r3], #4 diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S index 0dc86d54a..f25b3e356 100644 --- a/libc/arch-arm/bionic/memcpy.S +++ b/libc/arch-arm/bionic/memcpy.S @@ -352,9 +352,9 @@ ENTRY(memcpy) // preload the destination because we'll align it to a cache line // with small writes. Also start the source "pump". - PLD (r0, #0) - PLD (r1, #0) - PLD (r1, #32) + pld [r0, #0] + pld [r1, #0] + pld [r1, #32] /* it simplifies things to take care of len<4 early */ cmp r2, #4 @@ -442,7 +442,7 @@ cached_aligned32: add r12, r12, #64 1: ldmia r1!, { r4-r11 } - PLD (r12, #64) + pld [r12, #64] subs r2, r2, #32 // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi @@ -563,7 +563,7 @@ loop16: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #16 @@ -590,7 +590,7 @@ loop8: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #24 @@ -617,7 +617,7 @@ loop24: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #8 diff --git a/libc/arch-arm/bionic/strcmp.S b/libc/arch-arm/bionic/strcmp.S index 764a531e2..42d41d143 100644 --- a/libc/arch-arm/bionic/strcmp.S +++ b/libc/arch-arm/bionic/strcmp.S @@ -52,8 +52,8 @@ #define magic2(REG) REG, lsl #7 ENTRY(strcmp) - PLD(r0, #0) - PLD(r1, #0) + pld [r0, #0] + pld [r1, #0] eor r2, r0, r1 tst r2, #3 @@ -88,8 +88,8 @@ ENTRY(strcmp) orr r4, r4, r4, lsl #16 .p2align 2 4: - PLD(r0, #8) - PLD(r1, #8) + pld [r0, #8] + pld [r1, #8] sub r2, ip, magic1(r4) cmp ip, r3 itttt eq diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S index 87ebc44a0..699b88d7c 100644 --- a/libc/arch-arm/generic/bionic/memcpy.S +++ b/libc/arch-arm/generic/bionic/memcpy.S @@ -57,9 +57,9 @@ ENTRY(memcpy) // preload the destination because we'll align it to a cache line // with small writes. Also start the source "pump". - PLD (r0, #0) - PLD (r1, #0) - PLD (r1, #32) + pld [r0, #0] + pld [r1, #0] + pld [r1, #32] /* it simplifies things to take care of len<4 early */ cmp r2, #4 @@ -147,7 +147,7 @@ cached_aligned32: add r12, r12, #64 1: ldmia r1!, { r4-r11 } - PLD (r12, #64) + pld [r12, #64] subs r2, r2, #32 // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi @@ -268,7 +268,7 @@ loop16: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #16 @@ -295,7 +295,7 @@ loop8: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #24 @@ -322,7 +322,7 @@ loop24: ldr r12, [r1], #4 1: mov r4, r12 ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} - PLD (r1, #64) + pld [r1, #64] subs r2, r2, #32 ldrhs r12, [r1], #4 orr r3, r3, r4, lsl #8 diff --git a/libc/arch-arm/generic/bionic/strcmp.S b/libc/arch-arm/generic/bionic/strcmp.S index 764a531e2..42d41d143 100644 --- a/libc/arch-arm/generic/bionic/strcmp.S +++ b/libc/arch-arm/generic/bionic/strcmp.S @@ -52,8 +52,8 @@ #define magic2(REG) REG, lsl #7 ENTRY(strcmp) - PLD(r0, #0) - PLD(r1, #0) + pld [r0, #0] + pld [r1, #0] eor r2, r0, r1 tst r2, #3 @@ -88,8 +88,8 @@ ENTRY(strcmp) orr r4, r4, r4, lsl #16 .p2align 2 4: - PLD(r0, #8) - PLD(r1, #8) + pld [r0, #8] + pld [r1, #8] sub r2, ip, magic1(r4) cmp ip, r3 itttt eq diff --git a/libc/arch-arm/generic/bionic/strcpy.S b/libc/arch-arm/generic/bionic/strcpy.S index 21dafda7e..cc997f448 100644 --- a/libc/arch-arm/generic/bionic/strcpy.S +++ b/libc/arch-arm/generic/bionic/strcpy.S @@ -33,7 +33,7 @@ #include ENTRY(strcpy) - PLD(r1, #0) + pld [r1, #0] eor r2, r0, r1 mov ip, r0 tst r2, #3 @@ -62,7 +62,7 @@ ENTRY(strcpy) load stalls. */ .p2align 2 2: - PLD(r1, #8) + pld [r1, #8] ldr r4, [r1], #4 sub r2, r3, r5 bics r2, r2, r3 diff --git a/libc/arch-arm/generic/bionic/strlen.c b/libc/arch-arm/generic/bionic/strlen.c index 824cf78d0..811e1e0b1 100644 --- a/libc/arch-arm/generic/bionic/strlen.c +++ b/libc/arch-arm/generic/bionic/strlen.c @@ -63,9 +63,7 @@ size_t strlen(const char *s) "ldr %[v], [%[s]], #4 \n" "sub %[l], %[l], %[s] \n" "0: \n" -#if __ARM_HAVE_PLD "pld [%[s], #64] \n" -#endif "sub %[t], %[v], %[mask], lsr #7\n" "and %[t], %[t], %[mask] \n" "bics %[t], %[t], %[v] \n" diff --git a/libc/arch-arm/include/machine/cpu-features.h b/libc/arch-arm/include/machine/cpu-features.h index 80d3fda64..fc5a8fd14 100644 --- a/libc/arch-arm/include/machine/cpu-features.h +++ b/libc/arch-arm/include/machine/cpu-features.h @@ -34,133 +34,29 @@ * * This is done to abstract us from the various ARM Architecture * quirks and alphabet soup. - * - * IMPORTANT: We have no intention to support anything below an ARMv4T ! */ /* __ARM_ARCH__ is a number corresponding to the ARM revision - * we're going to support - * - * it looks like our toolchain doesn't define __ARM_ARCH__ + * we're going to support. Our toolchain doesn't define __ARM_ARCH__ * so try to guess it. - * - * - * */ #ifndef __ARM_ARCH__ - # if defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ || \ - defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__ - + defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__ # define __ARM_ARCH__ 7 - # elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ || \ - defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6Z__ || \ - defined __ARM_ARCH_6KZ__ || defined __ARM_ARCH_6T2__ -# + defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6Z__ || \ + defined __ARM_ARCH_6KZ__ || defined __ARM_ARCH_6T2__ # define __ARM_ARCH__ 6 -# -# elif defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || \ - defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ -# -# define __ARM_ARCH__ 5 -# -# elif defined __ARM_ARCH_4T__ -# -# define __ARM_ARCH__ 4 -# -# elif defined __ARM_ARCH_4__ -# error ARMv4 is not supported, please use ARMv4T at a minimum # else # error Unknown or unsupported ARM architecture # endif #endif -/* experimental feature used to check that our ARMv4 workarounds - * work correctly without a real ARMv4 machine */ -#ifdef BIONIC_EXPERIMENTAL_FORCE_ARMV4 -# undef __ARM_ARCH__ -# define __ARM_ARCH__ 4 -#endif - -/* define __ARM_HAVE_5TE if we have the ARMv5TE instructions */ -#if __ARM_ARCH__ > 5 -# define __ARM_HAVE_5TE 1 -#elif __ARM_ARCH__ == 5 -# if defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ -# define __ARM_HAVE_5TE 1 -# endif -#endif - -/* instructions introduced in ARMv5 */ -#if __ARM_ARCH__ >= 5 -# define __ARM_HAVE_BLX 1 -# define __ARM_HAVE_CLZ 1 -# define __ARM_HAVE_LDC2 1 -# define __ARM_HAVE_MCR2 1 -# define __ARM_HAVE_MRC2 1 -# define __ARM_HAVE_STC2 1 -#endif - -/* ARMv5TE introduces a few instructions */ -#if __ARM_HAVE_5TE -# define __ARM_HAVE_PLD 1 -# define __ARM_HAVE_MCRR 1 -# define __ARM_HAVE_MRRC 1 -#endif - /* define __ARM_HAVE_HALFWORD_MULTIPLY when half-word multiply instructions * this means variants of: smul, smulw, smla, smlaw, smlal */ -#if __ARM_HAVE_5TE -# define __ARM_HAVE_HALFWORD_MULTIPLY 1 -#endif - -/* define __ARM_HAVE_PAIR_LOAD_STORE when 64-bit memory loads and stored - * into/from a pair of 32-bit registers is supported throuhg 'ldrd' and 'strd' - */ -#if __ARM_HAVE_5TE -# define __ARM_HAVE_PAIR_LOAD_STORE 1 -#endif - -/* define __ARM_HAVE_SATURATED_ARITHMETIC is you have the saturated integer - * arithmetic instructions: qdd, qdadd, qsub, qdsub - */ -#if __ARM_HAVE_5TE -# define __ARM_HAVE_SATURATED_ARITHMETIC 1 -#endif - -/* define __ARM_HAVE_PC_INTERWORK when a direct assignment to the - * pc register will switch into thumb/ARM mode depending on bit 0 - * of the new instruction address. Before ARMv5, this was not the - * case, and you have to write: - * - * mov r0, [] - * bx r0 - * - * instead of: - * - * ldr pc, [] - * - * note that this affects any instruction that explicitly changes the - * value of the pc register, including ldm { ...,pc } or 'add pc, #offset' - */ -#if __ARM_ARCH__ >= 5 -# define __ARM_HAVE_PC_INTERWORK -#endif - -/* define __ARM_HAVE_LDREX_STREX for ARMv6 and ARMv7 architecture to be - * used in replacement of deprecated swp instruction - */ -#if __ARM_ARCH__ >= 6 -# define __ARM_HAVE_LDREX_STREX -#endif - -/* define __ARM_HAVE_DMB for ARMv7 architecture - */ -#if __ARM_ARCH__ >= 7 -# define __ARM_HAVE_DMB -#endif +#define __ARM_HAVE_HALFWORD_MULTIPLY 1 /* define __ARM_HAVE_LDREXD for ARMv7 architecture * (also present in ARMv6K, but not implemented in ARMv7-M, neither of which @@ -184,18 +80,4 @@ # define __ARM_HAVE_NEON #endif -/* Assembly-only macros */ -#ifdef __ASSEMBLY__ - -/* define a handy PLD(address) macro since the cache preload - * is an optional opcode - */ -#if __ARM_HAVE_PLD -# define PLD(reg,offset) pld [reg, offset] -#else -# define PLD(reg,offset) /* nothing */ -#endif - -#endif /* ! __ASSEMBLY__ */ - #endif /* _ARM_MACHINE_CPU_FEATURES_H */ diff --git a/libc/arch-arm/include/machine/endian.h b/libc/arch-arm/include/machine/endian.h index 7cba3b942..8d9723d7a 100644 --- a/libc/arch-arm/include/machine/endian.h +++ b/libc/arch-arm/include/machine/endian.h @@ -33,15 +33,6 @@ #ifdef __GNUC__ -/* - * REV and REV16 weren't available on ARM5 or ARM4. - * We don't include because it pollutes the - * namespace with macros like PLD. - */ -#if !defined __ARM_ARCH_5__ && !defined __ARM_ARCH_5T__ && \ - !defined __ARM_ARCH_5TE__ && !defined __ARM_ARCH_5TEJ__ && \ - !defined __ARM_ARCH_4T__ && !defined __ARM_ARCH_4__ - /* According to RealView Assembler User's Guide, REV and REV16 are available * in Thumb code and 16-bit instructions when used in Thumb-2 code. * @@ -55,13 +46,13 @@ */ #define __swap16md(x) ({ \ register u_int16_t _x = (x); \ - __asm volatile ("rev16 %0, %0" : "+l" (_x)); \ + __asm__ __volatile__("rev16 %0, %0" : "+l" (_x)); \ _x; \ }) #define __swap32md(x) ({ \ register u_int32_t _x = (x); \ - __asm volatile ("rev %0, %0" : "+l" (_x)); \ + __asm__ __volatile__("rev %0, %0" : "+l" (_x)); \ _x; \ }) @@ -74,7 +65,6 @@ /* Tell sys/endian.h we have MD variants of the swap macros. */ #define MD_SWAP -#endif /* __ARM_ARCH__ */ #endif /* __GNUC__ */ #if defined(__ARMEB__) diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h index 3bb639e0e..e94129ca6 100644 --- a/libc/private/bionic_atomic_arm.h +++ b/libc/private/bionic_atomic_arm.h @@ -82,47 +82,21 @@ */ #if defined(ANDROID_SMP) && ANDROID_SMP == 1 -/* Sanity check, multi-core is only supported starting from ARMv6 */ -# if __ARM_ARCH__ < 6 -# error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6 -# endif - -# ifdef __ARM_HAVE_DMB /* For ARMv7-A, we can use the 'dmb' instruction directly */ -__ATOMIC_INLINE__ void -__bionic_memory_barrier(void) -{ +__ATOMIC_INLINE__ void __bionic_memory_barrier(void) { /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't * bother with __ATOMIC_SWITCH_TO_ARM */ __asm__ __volatile__ ( "dmb" : : : "memory" ); } -# else /* !__ARM_HAVE_DMB */ -/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor, - * which requires the use of a general-purpose register, which is slightly - * less efficient. - */ -__ATOMIC_INLINE__ void -__bionic_memory_barrier(void) -{ - __asm__ __volatile__ ( - __SWITCH_TO_ARM - "mcr p15, 0, %0, c7, c10, 5" - __SWITCH_TO_THUMB - : : "r" (0) : __ATOMIC_CLOBBERS "memory"); -} -# endif /* !__ARM_HAVE_DMB */ + #else /* !ANDROID_SMP */ -__ATOMIC_INLINE__ void -__bionic_memory_barrier(void) -{ + +__ATOMIC_INLINE__ void __bionic_memory_barrier(void) { /* A simple compiler barrier */ __asm__ __volatile__ ( "" : : : "memory" ); } -#endif /* !ANDROID_SMP */ -#ifndef __ARM_HAVE_LDREX_STREX -#error Only ARM devices which have LDREX / STREX are supported -#endif +#endif /* !ANDROID_SMP */ /* Compare-and-swap, without any explicit barriers. Note that this functions * returns 0 on success, and 1 on failure. The opposite convention is typically