Merge "Clean up the pthread-only atomic stuff a little."

2013-12-16 18:21:01 +00:00 · 2013-12-16 18:21:01 +00:00 · 341b55a1e6
commit 341b55a1e6
parent 003be7104b 2b333b97a2
5 changed files with 125 additions and 300 deletions
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@ -16,172 +16,64 @@
 #ifndef BIONIC_ATOMIC_ARM_H
 #define BIONIC_ATOMIC_ARM_H
-#include <machine/cpu-features.h>
+__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
 /* Some of the harware instructions used below are not available in Thumb-1
 * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
 * problem, we're going to use the same technique than libatomics_ops,
 * which is to temporarily switch to ARM, do the operation, then switch
 * back to Thumb-1.
 *
 * This results in two 'bx' jumps, just like a normal function call, but
 * everything is kept inlined, avoids loading or computing the function's
 * address, and prevents a little I-cache trashing too.
 *
 * However, it is highly recommended to avoid compiling any C library source
 * file that use these functions in Thumb-1 mode.
 *
 * Define three helper macros to implement this:
 */
 #if defined(__thumb__) && !defined(__thumb2__)
 #  define  __ATOMIC_SWITCH_TO_ARM \
            "adr r3, 5f\n" \
            "bx  r3\n" \
            ".align\n" \
            ".arm\n" \
        "5:\n"
 /* note: the leading \n below is intentional */
 #  define __ATOMIC_SWITCH_TO_THUMB \
            "\n" \
            "adr r3, 6f\n" \
            "bx  r3\n" \
            ".thumb" \
        "6:\n"
 #  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
 /* Warn the user that ARM mode should really be preferred! */
 #  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
 #else
 #  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
 #  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
 #  define  __ATOMIC_CLOBBERS        /* nothing */
 #endif
 /* Define a full memory barrier, this is only needed if we build the
 * platform for a multi-core device. For the record, using a 'dmb'
 * instruction on a Nexus One device can take up to 180 ns even if
 * it is completely un-necessary on this device.
 *
 * NOTE: This is where the platform and NDK headers atomic headers are
 *        going to diverge. With the NDK, we don't know if the generated
 *        code is going to run on a single or multi-core device, so we
 *        need to be cautious.
 *
 *        I.e. on single-core devices, the helper immediately returns,
 *        on multi-core devices, it uses "dmb" or any other means to
 *        perform a full-memory barrier.
 *
 * There are three cases to consider for the platform:
 *
 *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
 *    - multi-core ARMv6         => use the coprocessor
 *    - single core ARMv6+       => do not use any hardware barrier
 */
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
-
+  __asm__ __volatile__ ( "dmb" : : : "memory" );
-/* For ARMv7-A, we can use the 'dmb' instruction directly */
+#else
-__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
+  /* A simple compiler barrier. */
-    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+  __asm__ __volatile__ ( "" : : : "memory" );
-     * bother with __ATOMIC_SWITCH_TO_ARM */
+#endif
    __asm__ __volatile__ ( "dmb" : : : "memory" );
 }
 #else /* !ANDROID_SMP */
 __ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
    /* A simple compiler barrier */
    __asm__ __volatile__ ( "" : : : "memory" );
 }
 #endif /* !ANDROID_SMP */
 /* Compare-and-swap, without any explicit barriers. Note that this functions
 * returns 0 on success, and 1 on failure. The opposite convention is typically
 * used on other platforms.
 */
-__ATOMIC_INLINE__ int
+__ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
-__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+  int32_t prev, status;
-{
+  do {
-    int32_t prev, status;
+    __asm__ __volatile__ (
-    do {
+          "ldrex %0, [%3]\n"
-        __asm__ __volatile__ (
+          "mov %1, #0\n"
-            __ATOMIC_SWITCH_TO_ARM
+          "teq %0, %4\n"
            "ldrex %0, [%3]\n"
            "mov %1, #0\n"
            "teq %0, %4\n"
 #ifdef __thumb2__
-            "it eq\n"
+          "it eq\n"
 #endif
-            "strexeq %1, %5, [%3]"
+          "strexeq %1, %5, [%3]"
-            __ATOMIC_SWITCH_TO_THUMB
+          : "=&r" (prev), "=&r" (status), "+m"(*ptr)
-            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+          : "r" (ptr), "Ir" (old_value), "r" (new_value)
-            : "r" (ptr), "Ir" (old_value), "r" (new_value)
+          : "cc");
-            : __ATOMIC_CLOBBERS "cc");
+  } while (__builtin_expect(status != 0, 0));
-    } while (__builtin_expect(status != 0, 0));
+  return prev != old_value;
    return prev != old_value;
 }
-/* Swap operation, without any explicit barriers. */
+/* Swap, without any explicit barriers. */
-__ATOMIC_INLINE__ int32_t
+__ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
-__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+  int32_t prev, status;
-{
+  do {
-    int32_t prev, status;
+    __asm__ __volatile__ (
-    do {
+          "ldrex %0, [%3]\n"
-        __asm__ __volatile__ (
+          "strex %1, %4, [%3]"
-            __ATOMIC_SWITCH_TO_ARM
+          : "=&r" (prev), "=&r" (status), "+m" (*ptr)
-            "ldrex %0, [%3]\n"
+          : "r" (ptr), "r" (new_value)
-            "strex %1, %4, [%3]"
+          : "cc");
-            __ATOMIC_SWITCH_TO_THUMB
+  } while (__builtin_expect(status != 0, 0));
-            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+  return prev;
            : "r" (ptr), "r" (new_value)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
 }
-/* Atomic increment - without any barriers
+/* Atomic decrement, without explicit barriers. */
- * This returns the old value
+__ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
- */
+  int32_t prev, tmp, status;
-__ATOMIC_INLINE__ int32_t
+  do {
-__bionic_atomic_inc(volatile int32_t* ptr)
+    __asm__ __volatile__ (
-{
+          "ldrex %0, [%4]\n"
-    int32_t prev, tmp, status;
+          "sub %1, %0, #1\n"
-    do {
+          "strex %2, %1, [%4]"
-        __asm__ __volatile__ (
+          : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
-            __ATOMIC_SWITCH_TO_ARM
+          : "r" (ptr)
-            "ldrex %0, [%4]\n"
+          : "cc");
-            "add %1, %0, #1\n"
+  } while (__builtin_expect(status != 0, 0));
-            "strex %2, %1, [%4]"
+  return prev;
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
            : "r" (ptr)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
 }
 /* Atomic decrement - without any barriers
 * This returns the old value.
 */
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t* ptr)
 {
    int32_t prev, tmp, status;
    do {
        __asm__ __volatile__ (
            __ATOMIC_SWITCH_TO_ARM
            "ldrex %0, [%4]\n"
            "sub %1, %0, #1\n"
            "strex %2, %1, [%4]"
            __ATOMIC_SWITCH_TO_THUMB
            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
            : "r" (ptr)
            : __ATOMIC_CLOBBERS "cc");
    } while (__builtin_expect(status != 0, 0));
    return prev;
 }
 #endif /* SYS_ATOMICS_ARM_H */
--- a/libc/private/bionic_atomic_gcc_builtin.h
+++ b/libc/private/bionic_atomic_gcc_builtin.h
@ -16,46 +16,35 @@
 #ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
 #define BIONIC_ATOMIC_GCC_BUILTIN_H
-/* This header file is used by default if we don't have optimized atomic
+/*
 * This header file is used by default if we don't have optimized atomic
 * routines for a given platform. See bionic_atomic_arm.h and
 * bionic_atomic_x86.h for examples.
 *
 * Note that the GCC builtins include barriers that aren't present in
 * the architecture-specific assembler versions.
 */
-__ATOMIC_INLINE__ void
+__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
-__bionic_memory_barrier(void)
+  __sync_synchronize();
 {
    __sync_synchronize();
 }
-__ATOMIC_INLINE__ int
+__ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
-__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+  /* We must return 0 on success. */
-{
+  return __sync_val_compare_and_swap(ptr, old_value, new_value) != old_value;
    /* We must return 0 on success */
    return __sync_val_compare_and_swap(ptr, old_value, new_value) != old_value;
 }
-__ATOMIC_INLINE__ int32_t
+__ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
-__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+  int32_t old_value;
-{
+  do {
-    int32_t old_value;
+    old_value = *ptr;
-    do {
+  } while (__sync_val_compare_and_swap(ptr, old_value, new_value) != old_value);
-        old_value = *ptr;
+  return old_value;
    } while (__sync_val_compare_and_swap(ptr, old_value, new_value) != old_value);
    return old_value;
 }
-__ATOMIC_INLINE__ int32_t
+__ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
-__bionic_atomic_inc(volatile int32_t* ptr)
+  /* We must return the old value. */
-{
+  return __sync_fetch_and_add(ptr, -1);
    /* We must return the old value */
    return __sync_fetch_and_add(ptr, 1);
 }
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t* ptr)
 {
    /* We must return the old value */
    return __sync_fetch_and_add(ptr, -1);
 }
 #endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@ -23,11 +23,6 @@
 * memory barrier needs to be issued inline rather than as a function
 * call.
 *
 * Most code should not use these.
 *
 * Anything that does include this file must set ANDROID_SMP to either
 * 0 or 1, indicating compilation for UP or SMP, respectively.
 *
 * Macros defined in this header:
 *
 * void ANDROID_MEMBAR_FULL(void)
--- a/libc/private/bionic_atomic_mips.h
+++ b/libc/private/bionic_atomic_mips.h
@ -19,84 +19,58 @@
 /* Define a full memory barrier, this is only needed if we build the
 * platform for a multi-core device.
 */
 __ATOMIC_INLINE__ void __bionic_memory_barrier() {
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
-__ATOMIC_INLINE__ void
+  __asm__ __volatile__ ( "sync" : : : "memory" );
 __bionic_memory_barrier()
 {
    __asm__ __volatile__ ( "sync" : : : "memory" );
 }
 #else
-__ATOMIC_INLINE__ void
+  /* A simple compiler barrier. */
-__bionic_memory_barrier()
+  __asm__ __volatile__ ( "" : : : "memory" );
 {
    /* A simple compiler barrier */
    __asm__ __volatile__ ( "" : : : "memory" );
 }
 #endif
 }
 /* Compare-and-swap, without any explicit barriers. Note that this function
 * returns 0 on success, and 1 on failure. The opposite convention is typically
 * used on other platforms.
 */
-__ATOMIC_INLINE__ int
+__ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
-__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+  int32_t prev, status;
-{
+  __asm__ __volatile__ ("1: move %[status], %[new_value]  \n"
-    int32_t prev, status;
+                        "   ll %[prev], 0(%[ptr])         \n"
-    __asm__ __volatile__ ("1: move %[status], %[new_value]  \n"
+                        "   bne %[old_value], %[prev], 2f \n"
-                          "   ll %[prev], 0(%[ptr])         \n"
+                        "   sc   %[status], 0(%[ptr])     \n"
-                          "   bne %[old_value], %[prev], 2f \n"
+                        "   beqz %[status], 1b            \n"
-                          "   sc   %[status], 0(%[ptr])     \n"
+                        "2:                               \n"
-                          "   beqz %[status], 1b            \n"
+                        : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
-                          "2:                               \n"
+                        : [new_value]"r"(new_value), [old_value]"r"(old_value), [ptr]"r"(ptr)
-                          : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
+                        : "memory");
-                          : [new_value]"r"(new_value), [old_value]"r"(old_value), [ptr]"r"(ptr)
+  return prev != old_value;
                          : "memory");
    return prev != old_value;
 }
-
+/* Swap, without any explicit barriers. */
-/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
-__ATOMIC_INLINE__ int32_t
+  int32_t prev, status;
-__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+  __asm__ __volatile__ ("1:  move %[status], %[new_value] \n"
-{
+                        "    ll %[prev], 0(%[ptr])        \n"
-   int32_t prev, status;
+                        "    sc %[status], 0(%[ptr])      \n"
-    __asm__ __volatile__ ("1:  move %[status], %[new_value] \n"
+                        "    beqz %[status], 1b           \n"
-                          "    ll %[prev], 0(%[ptr])        \n"
+                        : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
-                          "    sc %[status], 0(%[ptr])      \n"
+                        : [ptr]"r"(ptr), [new_value]"r"(new_value)
-                          "    beqz %[status], 1b           \n"
+                        : "memory");
-                          : [prev]"=&r"(prev), [status]"=&r"(status), "+m"(*ptr)
+  return prev;
                          : [ptr]"r"(ptr), [new_value]"r"(new_value)
                          : "memory");
    return prev;
 }
-/* Atomic increment, without explicit barriers */
+/* Atomic decrement, without explicit barriers. */
-__ATOMIC_INLINE__ int32_t
+__ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
-__bionic_atomic_inc(volatile int32_t *ptr)
+  int32_t prev, status;
-{
+  __asm__ __volatile__ ("1:  ll %[prev], 0(%[ptr])        \n"
-    int32_t prev, status;
+                        "    addiu %[status], %[prev], -1 \n"
-    __asm__ __volatile__ ("1:  ll %[prev], 0(%[ptr])        \n"
+                        "    sc   %[status], 0(%[ptr])    \n"
-                          "    addiu %[status], %[prev], 1  \n"
+                        "    beqz %[status], 1b           \n"
-                          "    sc   %[status], 0(%[ptr])    \n"
+                        : [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr)
-                          "    beqz %[status], 1b           \n"
+                        : [ptr]"r"(ptr)
-                          : [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr)
+                        : "memory");
-                          : [ptr]"r"(ptr)
+  return prev;
                          : "memory");
    return prev;
 }
 /* Atomic decrement, without explicit barriers */
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t *ptr)
 {
    int32_t prev, status;
    __asm__ __volatile__ ("1:  ll %[prev], 0(%[ptr])        \n"
                          "    addiu %[status], %[prev], -1 \n"
                          "    sc   %[status], 0(%[ptr])    \n"
                          "    beqz %[status], 1b           \n"
                          : [prev]"=&r" (prev), [status]"=&r"(status), "+m" (*ptr)
                          : [ptr]"r"(ptr)
                          : "memory");
    return prev;
 }
 #endif /* BIONIC_ATOMIC_MIPS_H */
--- a/libc/private/bionic_atomic_x86.h
+++ b/libc/private/bionic_atomic_x86.h
@ -19,28 +19,20 @@
 /* Define a full memory barrier, this is only needed if we build the
 * platform for a multi-core device.
 */
 __ATOMIC_INLINE__ void __bionic_memory_barrier() {
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
-__ATOMIC_INLINE__ void
+  __asm__ __volatile__ ( "mfence" : : : "memory" );
 __bionic_memory_barrier()
 {
    __asm__ __volatile__ ( "mfence" : : : "memory" );
 }
 #else
-__ATOMIC_INLINE__ void
+  /* A simple compiler barrier. */
-__bionic_memory_barrier()
+  __asm__ __volatile__ ( "" : : : "memory" );
 {
    /* A simple compiler barrier */
    __asm__ __volatile__ ( "" : : : "memory" );
 }
 #endif
 }
 /* Compare-and-swap, without any explicit barriers. Note that this function
 * returns 0 on success, and 1 on failure. The opposite convention is typically
 * used on other platforms.
 */
-__ATOMIC_INLINE__ int
+__ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
 __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
 {
    int32_t prev;
    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
                          : "=a" (prev)
@ -49,40 +41,23 @@ __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
    return prev != old_value;
 }
-
+/* Swap, without any explicit barriers. */
-/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t *ptr) {
-__ATOMIC_INLINE__ int32_t
+  __asm__ __volatile__ ("xchgl %1, %0"
-__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+                        : "=r" (new_value)
-{
+                        : "m" (*ptr), "0" (new_value)
-    __asm__ __volatile__ ("xchgl %1, %0"
+                        : "memory");
-                          : "=r" (new_value)
+  return new_value;
                          : "m" (*ptr), "0" (new_value)
                          : "memory");
    return new_value;
 }
-/* Atomic increment, without explicit barriers */
+/* Atomic decrement, without explicit barriers. */
-__ATOMIC_INLINE__ int32_t
+__ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
-__bionic_atomic_inc(volatile int32_t *ptr)
+  int increment = -1;
-{
+  __asm__ __volatile__ ("lock; xaddl %0, %1"
-    int increment = 1;
+                        : "+r" (increment), "+m" (*ptr)
-    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                        : : "memory");
-                          : "+r" (increment), "+m" (*ptr)
+  /* increment now holds the old value of *ptr */
-                          : : "memory");
+  return increment;
    /* increment now holds the old value of *ptr */
    return increment;
 }
 /* Atomic decrement, without explicit barriers */
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t *ptr)
 {
    int increment = -1;
    __asm__ __volatile__ ("lock; xaddl %0, %1"
                          : "+r" (increment), "+m" (*ptr)
                          : : "memory");
    /* increment now holds the old value of *ptr */
    return increment;
 }
 #endif /* BIONIC_ATOMIC_X86_H */