285 lines
8.9 KiB
C
285 lines
8.9 KiB
C
|
/*
|
||
|
* Copyright (C) 2011 The Android Open Source Project
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
#ifndef BIONIC_ATOMIC_ARM_H
|
||
|
#define BIONIC_ATOMIC_ARM_H
|
||
|
|
||
|
#include <machine/cpu-features.h>
|
||
|
|
||
|
/* Some of the harware instructions used below are not available in Thumb-1
|
||
|
* mode (they are if you build in ARM or Thumb-2 mode though). To solve this
|
||
|
* problem, we're going to use the same technique than libatomics_ops,
|
||
|
* which is to temporarily switch to ARM, do the operation, then switch
|
||
|
* back to Thumb-1.
|
||
|
*
|
||
|
* This results in two 'bx' jumps, just like a normal function call, but
|
||
|
* everything is kept inlined, avoids loading or computing the function's
|
||
|
* address, and prevents a little I-cache trashing too.
|
||
|
*
|
||
|
* However, it is highly recommended to avoid compiling any C library source
|
||
|
* file that use these functions in Thumb-1 mode.
|
||
|
*
|
||
|
* Define three helper macros to implement this:
|
||
|
*/
|
||
|
#if defined(__thumb__) && !defined(__thumb2__)
|
||
|
# define __ATOMIC_SWITCH_TO_ARM \
|
||
|
"adr r3, 5f\n" \
|
||
|
"bx r3\n" \
|
||
|
".align\n" \
|
||
|
".arm\n" \
|
||
|
"5:\n"
|
||
|
/* note: the leading \n below is intentional */
|
||
|
# define __ATOMIC_SWITCH_TO_THUMB \
|
||
|
"\n" \
|
||
|
"adr r3, 6f\n" \
|
||
|
"bx r3\n" \
|
||
|
".thumb" \
|
||
|
"6:\n"
|
||
|
|
||
|
# define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */
|
||
|
|
||
|
/* Warn the user that ARM mode should really be preferred! */
|
||
|
# warning Rebuilding this source file in ARM mode is highly recommended for performance!!
|
||
|
|
||
|
#else
|
||
|
# define __ATOMIC_SWITCH_TO_ARM /* nothing */
|
||
|
# define __ATOMIC_SWITCH_TO_THUMB /* nothing */
|
||
|
# define __ATOMIC_CLOBBERS /* nothing */
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/* Define a full memory barrier, this is only needed if we build the
|
||
|
* platform for a multi-core device. For the record, using a 'dmb'
|
||
|
* instruction on a Nexus One device can take up to 180 ns even if
|
||
|
* it is completely un-necessary on this device.
|
||
|
*
|
||
|
* NOTE: This is where the platform and NDK headers atomic headers are
|
||
|
* going to diverge. With the NDK, we don't know if the generated
|
||
|
* code is going to run on a single or multi-core device, so we
|
||
|
* need to be cautious.
|
||
|
*
|
||
|
* Fortunately, we can use the kernel helper function that is
|
||
|
* mapped at address 0xffff0fa0 in all user process, and that
|
||
|
* provides a device-specific barrier operation.
|
||
|
*
|
||
|
* I.e. on single-core devices, the helper immediately returns,
|
||
|
* on multi-core devices, it uses "dmb" or any other means to
|
||
|
* perform a full-memory barrier.
|
||
|
*
|
||
|
* There are three cases to consider for the platform:
|
||
|
*
|
||
|
* - multi-core ARMv7-A => use the 'dmb' hardware instruction
|
||
|
* - multi-core ARMv6 => use the coprocessor
|
||
|
* - single core ARMv5TE/6/7 => do not use any hardware barrier
|
||
|
*/
|
||
|
#if defined(ANDROID_SMP) && ANDROID_SMP == 1
|
||
|
|
||
|
/* Sanity check, multi-core is only supported starting from ARMv6 */
|
||
|
# if __ARM_ARCH__ < 6
|
||
|
# error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
|
||
|
# endif
|
||
|
|
||
|
# ifdef __ARM_HAVE_DMB
|
||
|
/* For ARMv7-A, we can use the 'dmb' instruction directly */
|
||
|
__ATOMIC_INLINE__ void
|
||
|
__bionic_memory_barrier(void)
|
||
|
{
|
||
|
/* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
|
||
|
* bother with __ATOMIC_SWITCH_TO_ARM */
|
||
|
__asm__ __volatile__ ( "dmb" : : : "memory" );
|
||
|
}
|
||
|
# else /* !__ARM_HAVE_DMB */
|
||
|
/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
|
||
|
* which requires the use of a general-purpose register, which is slightly
|
||
|
* less efficient.
|
||
|
*/
|
||
|
__ATOMIC_INLINE__ void
|
||
|
__bionic_memory_barrier(void)
|
||
|
{
|
||
|
__asm__ __volatile__ (
|
||
|
__SWITCH_TO_ARM
|
||
|
"mcr p15, 0, %0, c7, c10, 5"
|
||
|
__SWITCH_TO_THUMB
|
||
|
: : "r" (0) : __ATOMIC_CLOBBERS "memory");
|
||
|
}
|
||
|
# endif /* !__ARM_HAVE_DMB */
|
||
|
#else /* !ANDROID_SMP */
|
||
|
__ATOMIC_INLINE__ void
|
||
|
__bionic_memory_barrier(void)
|
||
|
{
|
||
|
/* A simple compiler barrier */
|
||
|
__asm__ __volatile__ ( "" : : : "memory" );
|
||
|
}
|
||
|
#endif /* !ANDROID_SMP */
|
||
|
|
||
|
/* Compare-and-swap, without any explicit barriers. Note that this functions
|
||
|
* returns 0 on success, and 1 on failure. The opposite convention is typically
|
||
|
* used on other platforms.
|
||
|
*
|
||
|
* There are two cases to consider:
|
||
|
*
|
||
|
* - ARMv6+ => use LDREX/STREX instructions
|
||
|
* - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
|
||
|
*
|
||
|
* LDREX/STREX are only available starting from ARMv6
|
||
|
*/
|
||
|
#ifdef __ARM_HAVE_LDREX_STREX
|
||
|
__ATOMIC_INLINE__ int
|
||
|
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, status;
|
||
|
do {
|
||
|
__asm__ __volatile__ (
|
||
|
__ATOMIC_SWITCH_TO_ARM
|
||
|
"ldrex %0, [%3]\n"
|
||
|
"mov %1, #0\n"
|
||
|
"teq %0, %4\n"
|
||
|
#ifdef __thumb2__
|
||
|
"it eq\n"
|
||
|
#endif
|
||
|
"strexeq %1, %5, [%3]"
|
||
|
__ATOMIC_SWITCH_TO_THUMB
|
||
|
: "=&r" (prev), "=&r" (status), "+m"(*ptr)
|
||
|
: "r" (ptr), "Ir" (old_value), "r" (new_value)
|
||
|
: __ATOMIC_CLOBBERS "cc");
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev != old_value;
|
||
|
}
|
||
|
# else /* !__ARM_HAVE_LDREX_STREX */
|
||
|
|
||
|
/* Use the handy kernel helper function mapped at 0xffff0fc0 */
|
||
|
typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
|
||
|
|
||
|
__ATOMIC_INLINE__ int
|
||
|
__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
|
||
|
{
|
||
|
/* Note: the kernel function returns 0 on success too */
|
||
|
return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
|
||
|
}
|
||
|
|
||
|
__ATOMIC_INLINE__ int
|
||
|
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
|
||
|
{
|
||
|
return __kernel_cmpxchg(old_value, new_value, ptr);
|
||
|
}
|
||
|
#endif /* !__ARM_HAVE_LDREX_STREX */
|
||
|
|
||
|
/* Swap operation, without any explicit barriers.
|
||
|
* There are again two similar cases to consider:
|
||
|
*
|
||
|
* ARMv6+ => use LDREX/STREX
|
||
|
* < ARMv6 => use SWP instead.
|
||
|
*/
|
||
|
#ifdef __ARM_HAVE_LDREX_STREX
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, status;
|
||
|
do {
|
||
|
__asm__ __volatile__ (
|
||
|
__ATOMIC_SWITCH_TO_ARM
|
||
|
"ldrex %0, [%3]\n"
|
||
|
"strex %1, %4, [%3]"
|
||
|
__ATOMIC_SWITCH_TO_THUMB
|
||
|
: "=&r" (prev), "=&r" (status), "+m" (*ptr)
|
||
|
: "r" (ptr), "r" (new_value)
|
||
|
: __ATOMIC_CLOBBERS "cc");
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev;
|
||
|
}
|
||
|
#else /* !__ARM_HAVE_LDREX_STREX */
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev;
|
||
|
/* NOTE: SWP is available in Thumb-1 too */
|
||
|
__asm__ __volatile__ ("swp %0, %2, [%3]"
|
||
|
: "=&r" (prev), "+m" (*ptr)
|
||
|
: "r" (new_value), "r" (ptr)
|
||
|
: "cc");
|
||
|
return prev;
|
||
|
}
|
||
|
#endif /* !__ARM_HAVE_LDREX_STREX */
|
||
|
|
||
|
/* Atomic increment - without any barriers
|
||
|
* This returns the old value
|
||
|
*/
|
||
|
#ifdef __ARM_HAVE_LDREX_STREX
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_atomic_inc(volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, tmp, status;
|
||
|
do {
|
||
|
__asm__ __volatile__ (
|
||
|
__ATOMIC_SWITCH_TO_ARM
|
||
|
"ldrex %0, [%4]\n"
|
||
|
"add %1, %0, #1\n"
|
||
|
"strex %2, %1, [%4]"
|
||
|
__ATOMIC_SWITCH_TO_THUMB
|
||
|
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
|
||
|
: "r" (ptr)
|
||
|
: __ATOMIC_CLOBBERS "cc");
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev;
|
||
|
}
|
||
|
#else
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_atomic_inc(volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, status;
|
||
|
do {
|
||
|
prev = *ptr;
|
||
|
status = __kernel_cmpxchg(prev, prev+1, ptr);
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* Atomic decrement - without any barriers
|
||
|
* This returns the old value.
|
||
|
*/
|
||
|
#ifdef __ARM_HAVE_LDREX_STREX
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_atomic_dec(volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, tmp, status;
|
||
|
do {
|
||
|
__asm__ __volatile__ (
|
||
|
__ATOMIC_SWITCH_TO_ARM
|
||
|
"ldrex %0, [%4]\n"
|
||
|
"sub %1, %0, #1\n"
|
||
|
"strex %2, %1, [%4]"
|
||
|
__ATOMIC_SWITCH_TO_THUMB
|
||
|
: "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
|
||
|
: "r" (ptr)
|
||
|
: __ATOMIC_CLOBBERS "cc");
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev;
|
||
|
}
|
||
|
#else
|
||
|
__ATOMIC_INLINE__ int32_t
|
||
|
__bionic_atomic_dec(volatile int32_t* ptr)
|
||
|
{
|
||
|
int32_t prev, status;
|
||
|
do {
|
||
|
prev = *ptr;
|
||
|
status = __kernel_cmpxchg(prev, prev-1, ptr);
|
||
|
} while (__builtin_expect(status != 0, 0));
|
||
|
return prev;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* SYS_ATOMICS_ARM_H */
|