Merge "Clean up the pthread-only atomic stuff a little."

This commit is contained in:
Elliott Hughes 2013-12-16 18:21:01 +00:00 committed by Gerrit Code Review
commit 341b55a1e6
5 changed files with 125 additions and 300 deletions

View File

@ -16,172 +16,64 @@
#ifndef BIONIC_ATOMIC_ARM_H #ifndef BIONIC_ATOMIC_ARM_H
#define BIONIC_ATOMIC_ARM_H #define BIONIC_ATOMIC_ARM_H
#include <machine/cpu-features.h> __ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
/* Some of the harware instructions used below are not available in Thumb-1
* mode (they are if you build in ARM or Thumb-2 mode though). To solve this
* problem, we're going to use the same technique than libatomics_ops,
* which is to temporarily switch to ARM, do the operation, then switch
* back to Thumb-1.
*
* This results in two 'bx' jumps, just like a normal function call, but
* everything is kept inlined, avoids loading or computing the function's
* address, and prevents a little I-cache trashing too.
*
* However, it is highly recommended to avoid compiling any C library source
* file that use these functions in Thumb-1 mode.
*
* Define three helper macros to implement this:
*/
#if defined(__thumb__) && !defined(__thumb2__)
# define __ATOMIC_SWITCH_TO_ARM \
"adr r3, 5f\n" \
"bx r3\n" \
".align\n" \
".arm\n" \
"5:\n"
/* note: the leading \n below is intentional */
# define __ATOMIC_SWITCH_TO_THUMB \
"\n" \
"adr r3, 6f\n" \
"bx r3\n" \
".thumb" \
"6:\n"
# define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */
/* Warn the user that ARM mode should really be preferred! */
# warning Rebuilding this source file in ARM mode is highly recommended for performance!!
#else
# define __ATOMIC_SWITCH_TO_ARM /* nothing */
# define __ATOMIC_SWITCH_TO_THUMB /* nothing */
# define __ATOMIC_CLOBBERS /* nothing */
#endif
/* Define a full memory barrier, this is only needed if we build the
* platform for a multi-core device. For the record, using a 'dmb'
* instruction on a Nexus One device can take up to 180 ns even if
* it is completely un-necessary on this device.
*
* NOTE: This is where the platform and NDK headers atomic headers are
* going to diverge. With the NDK, we don't know if the generated
* code is going to run on a single or multi-core device, so we
* need to be cautious.
*
* I.e. on single-core devices, the helper immediately returns,
* on multi-core devices, it uses "dmb" or any other means to
* perform a full-memory barrier.
*
* There are three cases to consider for the platform:
*
* - multi-core ARMv7-A => use the 'dmb' hardware instruction
* - multi-core ARMv6 => use the coprocessor
* - single core ARMv6+ => do not use any hardware barrier
*/
#if defined(ANDROID_SMP) && ANDROID_SMP == 1 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
__asm__ __volatile__ ( "dmb" : : : "memory" );
/* For ARMv7-A, we can use the 'dmb' instruction directly */ #else
__ATOMIC_INLINE__ void __bionic_memory_barrier(void) { /* A simple compiler barrier. */
/* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't __asm__ __volatile__ ( "" : : : "memory" );
* bother with __ATOMIC_SWITCH_TO_ARM */ #endif
__asm__ __volatile__ ( "dmb" : : : "memory" );
} }
#else /* !ANDROID_SMP */
__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
/* A simple compiler barrier */
__asm__ __volatile__ ( "" : : : "memory" );
}
#endif /* !ANDROID_SMP */
/* Compare-and-swap, without any explicit barriers. Note that this functions /* Compare-and-swap, without any explicit barriers. Note that this functions
* returns 0 on success, and 1 on failure. The opposite convention is typically * returns 0 on success, and 1 on failure. The opposite convention is typically
* used on other platforms. * used on other platforms.
*/ */
__ATOMIC_INLINE__ int __ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) int32_t prev, status;
{ do {
int32_t prev, status; __asm__ __volatile__ (
do { "ldrex %0, [%3]\n"
__asm__ __volatile__ ( "mov %1, #0\n"
__ATOMIC_SWITCH_TO_ARM "teq %0, %4\n"
"ldrex %0, [%3]\n"
"mov %1, #0\n"
"teq %0, %4\n"
#ifdef __thumb2__ #ifdef __thumb2__
"it eq\n" "it eq\n"
#endif #endif
"strexeq %1, %5, [%3]" "strexeq %1, %5, [%3]"
__ATOMIC_SWITCH_TO_THUMB : "=&r" (prev), "=&r" (status), "+m"(*ptr)
: "=&r" (prev), "=&r" (status), "+m"(*ptr) : "r" (ptr), "Ir" (old_value), "r" (new_value)
: "r" (ptr), "Ir" (old_value), "r" (new_value) : "cc");
: __ATOMIC_CLOBBERS "cc"); } while (__builtin_expect(status != 0, 0));
} while (__builtin_expect(status != 0, 0)); return prev != old_value;
return prev != old_value;
} }
/* Swap operation, without any explicit barriers. */ /* Swap, without any explicit barriers. */
__ATOMIC_INLINE__ int32_t __ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
__bionic_swap(int32_t new_value, volatile int32_t* ptr) int32_t prev, status;
{ do {
int32_t prev, status; __asm__ __volatile__ (
do { "ldrex %0, [%3]\n"
__asm__ __volatile__ ( "strex %1, %4, [%3]"
__ATOMIC_SWITCH_TO_ARM : "=&r" (prev), "=&r" (status), "+m" (*ptr)
"ldrex %0, [%3]\n" : "r" (ptr), "r" (new_value)
"strex %1, %4, [%3]" : "cc");
__ATOMIC_SWITCH_TO_THUMB } while (__builtin_expect(status != 0, 0));
: "=&r" (prev), "=&r" (status), "+m" (*ptr) return prev;
: "r" (ptr), "r" (new_value)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
} }
/* Atomic increment - without any barriers /* Atomic decrement, without explicit barriers. */
* This returns the old value __ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
*/ int32_t prev, tmp, status;
__ATOMIC_INLINE__ int32_t do {
__bionic_atomic_inc(volatile int32_t* ptr) __asm__ __volatile__ (
{ "ldrex %0, [%4]\n"
int32_t prev, tmp, status; "sub %1, %0, #1\n"
do { "strex %2, %1, [%4]"
__asm__ __volatile__ ( : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
__ATOMIC_SWITCH_TO_ARM : "r" (ptr)
"ldrex %0, [%4]\n" : "cc");
"add %1, %0, #1\n" } while (__builtin_expect(status != 0, 0));
"strex %2, %1, [%4]" return prev;
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
: "r" (ptr)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
}
/* Atomic decrement - without any barriers
* This returns the old value.
*/
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
int32_t prev, tmp, status;
do {
__asm__ __volatile__ (
__ATOMIC_SWITCH_TO_ARM
"ldrex %0, [%4]\n"
"sub %1, %0, #1\n"
"strex %2, %1, [%4]"
__ATOMIC_SWITCH_TO_THUMB
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
: "r" (ptr)
: __ATOMIC_CLOBBERS "cc");
} while (__builtin_expect(status != 0, 0));
return prev;
} }
#endif /* SYS_ATOMICS_ARM_H */ #endif /* SYS_ATOMICS_ARM_H */

View File

@ -16,46 +16,35 @@
#ifndef BIONIC_ATOMIC_GCC_BUILTIN_H #ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
#define BIONIC_ATOMIC_GCC_BUILTIN_H #define BIONIC_ATOMIC_GCC_BUILTIN_H
/* This header file is used by default if we don't have optimized atomic /*
* This header file is used by default if we don't have optimized atomic
* routines for a given platform. See bionic_atomic_arm.h and * routines for a given platform. See bionic_atomic_arm.h and
* bionic_atomic_x86.h for examples. * bionic_atomic_x86.h for examples.
*
* Note that the GCC builtins include barriers that aren't present in
* the architecture-specific assembler versions.
*/ */
__ATOMIC_INLINE__ void __ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
__bionic_memory_barrier(void) __sync_synchronize();
{
__sync_synchronize();
} }
__ATOMIC_INLINE__ int __ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) /* We must return 0 on success. */
{ return __sync_val_compare_and_swap(ptr, old_value, new_value) != old_value;
/* We must return 0 on success */
return __sync_val_compare_and_swap(ptr, old_value, new_value) != old_value;
} }
__ATOMIC_INLINE__ int32_t __ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
__bionic_swap(int32_t new_value, volatile int32_t* ptr) int32_t old_value;
{ do {
int32_t old_value; old_value = *ptr;
do { } while (__sync_val_compare_and_swap(ptr, old_value, new_value) != old_value);
old_value = *ptr; return old_value;
} while (__sync_val_compare_and_swap(ptr, old_value, new_value) != old_value);
return old_value;
} }
__ATOMIC_INLINE__ int32_t __ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
__bionic_atomic_inc(volatile int32_t* ptr) /* We must return the old value. */
{ return __sync_fetch_and_add(ptr, -1);
/* We must return the old value */
return __sync_fetch_and_add(ptr, 1);
}
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t* ptr)
{
/* We must return the old value */
return __sync_fetch_and_add(ptr, -1);
} }
#endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */ #endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */

View File

@ -23,11 +23,6 @@
* memory barrier needs to be issued inline rather than as a function * memory barrier needs to be issued inline rather than as a function
* call. * call.
* *
* Most code should not use these.
*
* Anything that does include this file must set ANDROID_SMP to either
* 0 or 1, indicating compilation for UP or SMP, respectively.
*
* Macros defined in this header: * Macros defined in this header:
* *
* void ANDROID_MEMBAR_FULL(void) * void ANDROID_MEMBAR_FULL(void)

View File

@ -19,84 +19,58 @@
/* Define a full memory barrier, this is only needed if we build the /* Define a full memory barrier, this is only needed if we build the
* platform for a multi-core device. * platform for a multi-core device.
*/ */
__ATOMIC_INLINE__ void __bionic_memory_barrier() {
#if defined(ANDROID_SMP) && ANDROID_SMP == 1 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
__ATOMIC_INLINE__ void __asm__ __volatile__ ( "sync" : : : "memory" );
__bionic_memory_barrier()
{
__asm__ __volatile__ ( "sync" : : : "memory" );
}
#else #else
__ATOMIC_INLINE__ void /* A simple compiler barrier. */
__bionic_memory_barrier() __asm__ __volatile__ ( "" : : : "memory" );
{
/* A simple compiler barrier */
__asm__ __volatile__ ( "" : : : "memory" );
}
#endif #endif
}
/* Compare-and-swap, without any explicit barriers. Note that this function /* Compare-and-swap, without any explicit barriers. Note that this function
* returns 0 on success, and 1 on failure. The opposite convention is typically * returns 0 on success, and 1 on failure. The opposite convention is typically
* used on other platforms. * used on other platforms.
*/ */
__ATOMIC_INLINE__ int __ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) int32_t prev, status;
{ __asm__ __volatile__ ("1: move %[status], %[new_value] \n"
int32_t prev, status; " ll %[prev], 0(%[ptr]) \n"
__asm__ __volatile__ ("1: move %[status], %[new_value] \n" " bne %[old_value], %[prev], 2f \n"
" ll %[prev], 0(%[ptr]) \n" " sc %[status], 0(%[ptr]) \n"
" bne %[old_value], %[prev], 2f \n" " beqz %[status], 1b \n"
" sc %[status], 0(%[ptr]) \n" "2: \n"
" beqz %[status], 1b \n" : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
"2: \n" : [new_value]"r"(new_value), [old_value]"r"(old_value), [ptr]"r"(ptr)
: [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr) : "memory");
: [new_value]"r"(new_value), [old_value]"r"(old_value), [ptr]"r"(ptr) return prev != old_value;
: "memory");
return prev != old_value;
} }
/* Swap, without any explicit barriers. */
/* Swap, without any explicit barriers */ __ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
__ATOMIC_INLINE__ int32_t int32_t prev, status;
__bionic_swap(int32_t new_value, volatile int32_t *ptr) __asm__ __volatile__ ("1: move %[status], %[new_value] \n"
{ " ll %[prev], 0(%[ptr]) \n"
int32_t prev, status; " sc %[status], 0(%[ptr]) \n"
__asm__ __volatile__ ("1: move %[status], %[new_value] \n" " beqz %[status], 1b \n"
" ll %[prev], 0(%[ptr]) \n" : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
" sc %[status], 0(%[ptr]) \n" : [ptr]"r"(ptr), [new_value]"r"(new_value)
" beqz %[status], 1b \n" : "memory");
: [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr) return prev;
: [ptr]"r"(ptr), [new_value]"r"(new_value)
: "memory");
return prev;
} }
/* Atomic increment, without explicit barriers */ /* Atomic decrement, without explicit barriers. */
__ATOMIC_INLINE__ int32_t __ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
__bionic_atomic_inc(volatile int32_t *ptr) int32_t prev, status;
{ __asm__ __volatile__ ("1: ll %[prev], 0(%[ptr]) \n"
int32_t prev, status; " addiu %[status], %[prev], -1 \n"
__asm__ __volatile__ ("1: ll %[prev], 0(%[ptr]) \n" " sc %[status], 0(%[ptr]) \n"
" addiu %[status], %[prev], 1 \n" " beqz %[status], 1b \n"
" sc %[status], 0(%[ptr]) \n" : [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr)
" beqz %[status], 1b \n" : [ptr]"r"(ptr)
: [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr) : "memory");
: [ptr]"r"(ptr) return prev;
: "memory");
return prev;
} }
/* Atomic decrement, without explicit barriers */
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t *ptr)
{
int32_t prev, status;
__asm__ __volatile__ ("1: ll %[prev], 0(%[ptr]) \n"
" addiu %[status], %[prev], -1 \n"
" sc %[status], 0(%[ptr]) \n"
" beqz %[status], 1b \n"
: [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr)
: [ptr]"r"(ptr)
: "memory");
return prev;
}
#endif /* BIONIC_ATOMIC_MIPS_H */ #endif /* BIONIC_ATOMIC_MIPS_H */

View File

@ -19,28 +19,20 @@
/* Define a full memory barrier, this is only needed if we build the /* Define a full memory barrier, this is only needed if we build the
* platform for a multi-core device. * platform for a multi-core device.
*/ */
__ATOMIC_INLINE__ void __bionic_memory_barrier() {
#if defined(ANDROID_SMP) && ANDROID_SMP == 1 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
__ATOMIC_INLINE__ void __asm__ __volatile__ ( "mfence" : : : "memory" );
__bionic_memory_barrier()
{
__asm__ __volatile__ ( "mfence" : : : "memory" );
}
#else #else
__ATOMIC_INLINE__ void /* A simple compiler barrier. */
__bionic_memory_barrier() __asm__ __volatile__ ( "" : : : "memory" );
{
/* A simple compiler barrier */
__asm__ __volatile__ ( "" : : : "memory" );
}
#endif #endif
}
/* Compare-and-swap, without any explicit barriers. Note that this function /* Compare-and-swap, without any explicit barriers. Note that this function
* returns 0 on success, and 1 on failure. The opposite convention is typically * returns 0 on success, and 1 on failure. The opposite convention is typically
* used on other platforms. * used on other platforms.
*/ */
__ATOMIC_INLINE__ int __ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
int32_t prev; int32_t prev;
__asm__ __volatile__ ("lock; cmpxchgl %1, %2" __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
: "=a" (prev) : "=a" (prev)
@ -49,40 +41,23 @@ __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
return prev != old_value; return prev != old_value;
} }
/* Swap, without any explicit barriers. */
/* Swap, without any explicit barriers */ __ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t *ptr) {
__ATOMIC_INLINE__ int32_t __asm__ __volatile__ ("xchgl %1, %0"
__bionic_swap(int32_t new_value, volatile int32_t *ptr) : "=r" (new_value)
{ : "m" (*ptr), "0" (new_value)
__asm__ __volatile__ ("xchgl %1, %0" : "memory");
: "=r" (new_value) return new_value;
: "m" (*ptr), "0" (new_value)
: "memory");
return new_value;
} }
/* Atomic increment, without explicit barriers */ /* Atomic decrement, without explicit barriers. */
__ATOMIC_INLINE__ int32_t __ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
__bionic_atomic_inc(volatile int32_t *ptr) int increment = -1;
{ __asm__ __volatile__ ("lock; xaddl %0, %1"
int increment = 1; : "+r" (increment), "+m" (*ptr)
__asm__ __volatile__ ("lock; xaddl %0, %1" : : "memory");
: "+r" (increment), "+m" (*ptr) /* increment now holds the old value of *ptr */
: : "memory"); return increment;
/* increment now holds the old value of *ptr */
return increment;
}
/* Atomic decrement, without explicit barriers */
__ATOMIC_INLINE__ int32_t
__bionic_atomic_dec(volatile int32_t *ptr)
{
int increment = -1;
__asm__ __volatile__ ("lock; xaddl %0, %1"
: "+r" (increment), "+m" (*ptr)
: : "memory");
/* increment now holds the old value of *ptr */
return increment;
} }
#endif /* BIONIC_ATOMIC_X86_H */ #endif /* BIONIC_ATOMIC_X86_H */