Implement atomic operations for armv7a

This commit implements atomic operations for the armv7a architecture
using gcc inline assembly.  This offers higher performance compared to
pthread mutexes.

Tested on an am3517 evm, clocked at 600MHz:

./inproc_thr 200 1000000
------------------------
53-60K messages / sec, pthread mutexes
73-90K messages / sec, assembly atomic ops

./inproc_lat 200 1000000
------------------------
average latency: 42.234 [us], pthread mutexes
average latency: 35.496 [us], assembly atomic ops
This commit is contained in:
Patrick Trantham 2012-04-13 10:48:15 -05:00
parent c120f02dc9
commit 135fdab0ec
2 changed files with 67 additions and 0 deletions

View File

@ -29,6 +29,8 @@
#define ZMQ_ATOMIC_COUNTER_MUTEX
#elif (defined __i386__ || defined __x86_64__) && defined __GNUC__
#define ZMQ_ATOMIC_COUNTER_X86
#elif defined __ARM_ARCH_7A__ && defined __GNUC__
#define ZMQ_ATOMIC_COUNTER_ARM
#elif defined ZMQ_HAVE_WINDOWS
#define ZMQ_ATOMIC_COUNTER_WINDOWS
#elif (defined ZMQ_HAVE_SOLARIS || defined ZMQ_HAVE_NETBSD)
@ -88,6 +90,19 @@ namespace zmq
: "=r" (old_value), "=m" (value)
: "0" (increment_), "m" (value)
: "cc", "memory");
#elif defined ZMQ_ATOMIC_COUNTER_ARM
integer_t flag, tmp;
__asm__ volatile (
" dmb sy\n\t"
"1: ldrex %0, [%5]\n\t"
" add %2, %0, %4\n\t"
" strex %1, %2, [%5]\n\t"
" teq %1, #0\n\t"
" bne 1b\n\t"
" dmb sy\n\t"
: "=&r"(old_value), "=&r"(flag), "=&r"(tmp), "+Qo"(value)
: "Ir"(increment_), "r"(&value)
: "cc");
#elif defined ZMQ_ATOMIC_COUNTER_MUTEX
sync.lock ();
old_value = value;
@ -118,6 +133,20 @@ namespace zmq
: "0" (oldval), "m" (*val)
: "cc", "memory");
return oldval != decrement;
#elif defined ZMQ_ATOMIC_COUNTER_ARM
integer_t old_value, flag, tmp;
__asm__ volatile (
" dmb sy\n\t"
"1: ldrex %0, [%5]\n\t"
" sub %2, %0, %4\n\t"
" strex %1, %2, [%5]\n\t"
" teq %1, #0\n\t"
" bne 1b\n\t"
" dmb sy\n\t"
: "=&r"(old_value), "=&r"(flag), "=&r"(tmp), "+Qo"(value)
: "Ir"(decrement), "r"(&value)
: "cc");
return old_value - decrement != 0;
#elif defined ZMQ_ATOMIC_COUNTER_MUTEX
sync.lock ();
value -= decrement;
@ -157,6 +186,9 @@ namespace zmq
#if defined ZMQ_ATOMIC_COUNTER_X86
#undef ZMQ_ATOMIC_COUNTER_X86
#endif
#if defined ZMQ_ATOMIC_COUNTER_ARM
#undef ZMQ_ATOMIC_COUNTER_ARM
#endif
#if defined ZMQ_ATOMIC_COUNTER_MUTEX
#undef ZMQ_ATOMIC_COUNTER_MUTEX
#endif

View File

@ -28,6 +28,8 @@
#define ZMQ_ATOMIC_PTR_MUTEX
#elif (defined __i386__ || defined __x86_64__) && defined __GNUC__
#define ZMQ_ATOMIC_PTR_X86
#elif defined __ARM_ARCH_7A__ && defined __GNUC__
#define ZMQ_ATOMIC_PTR_ARM
#elif defined ZMQ_HAVE_WINDOWS
#define ZMQ_ATOMIC_PTR_WINDOWS
#elif (defined ZMQ_HAVE_SOLARIS || defined ZMQ_HAVE_NETBSD)
@ -87,6 +89,20 @@ namespace zmq
: "=r" (old), "=m" (ptr)
: "m" (ptr), "0" (val_));
return old;
#elif defined ZMQ_ATOMIC_PTR_ARM
T* old;
unsigned int flag;
__asm__ volatile (
" dmb sy\n\t"
"1: ldrex %1, [%3]\n\t"
" strex %0, %4, [%3]\n\t"
" teq %0, #0\n\t"
" bne 1b\n\t"
" dmb sy\n\t"
: "=&r"(flag), "=&r"(old), "+Qo"(ptr)
: "r"(&ptr), "r"(val_)
: "cc");
return old;
#elif defined ZMQ_ATOMIC_PTR_MUTEX
sync.lock ();
T *old = (T*) ptr;
@ -117,6 +133,22 @@ namespace zmq
: "r" (val_), "m" (ptr), "0" (cmp_)
: "cc");
return old;
#elif defined ZMQ_ATOMIC_PTR_ARM
T *old;
unsigned int flag;
__asm__ volatile (
" dmb sy\n\t"
"1: ldrex %1, [%3]\n\t"
" mov %0, #0\n\t"
" teq %1, %4\n\t"
" strexeq %0, %5, [%3]\n\t"
" teq %0, #0\n\t"
" bne 1b\n\t"
" dmb sy\n\t"
: "=&r"(flag), "=&r"(old), "+Qo"(ptr)
: "r"(&ptr), "r"(cmp_), "r"(val_)
: "cc");
return old;
#elif defined ZMQ_ATOMIC_PTR_MUTEX
sync.lock ();
T *old = (T*) ptr;
@ -152,6 +184,9 @@ namespace zmq
#if defined ZMQ_ATOMIC_PTR_X86
#undef ZMQ_ATOMIC_PTR_X86
#endif
#if defined ZMQ_ATOMIC_PTR_ARM
#undef ZMQ_ATOMIC_PTR_ARM
#endif
#if defined ZMQ_ATOMIC_PTR_MUTEX
#undef ZMQ_ATOMIC_PTR_MUTEX
#endif