From 135fdab0eced035e6f2aaaa306dae438165b4303 Mon Sep 17 00:00:00 2001 From: Patrick Trantham Date: Fri, 13 Apr 2012 10:48:15 -0500 Subject: [PATCH] Implement atomic operations for armv7a This commit implements atomic operations for the armv7a architecture using gcc inline assembly. This offers higher performance compared to pthread mutexes. Tested on an am3517 evm, clocked at 600MHz: ./inproc_thr 200 1000000 ------------------------ 53-60K messages / sec, pthread mutexes 73-90K messages / sec, assembly atomic ops ./inproc_lat 200 1000000 ------------------------ average latency: 42.234 [us], pthread mutexes average latency: 35.496 [us], assembly atomic ops --- src/atomic_counter.hpp | 32 ++++++++++++++++++++++++++++++++ src/atomic_ptr.hpp | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/src/atomic_counter.hpp b/src/atomic_counter.hpp index a0a67bf0..65468bb5 100644 --- a/src/atomic_counter.hpp +++ b/src/atomic_counter.hpp @@ -29,6 +29,8 @@ #define ZMQ_ATOMIC_COUNTER_MUTEX #elif (defined __i386__ || defined __x86_64__) && defined __GNUC__ #define ZMQ_ATOMIC_COUNTER_X86 +#elif defined __ARM_ARCH_7A__ && defined __GNUC__ +#define ZMQ_ATOMIC_COUNTER_ARM #elif defined ZMQ_HAVE_WINDOWS #define ZMQ_ATOMIC_COUNTER_WINDOWS #elif (defined ZMQ_HAVE_SOLARIS || defined ZMQ_HAVE_NETBSD) @@ -88,6 +90,19 @@ namespace zmq : "=r" (old_value), "=m" (value) : "0" (increment_), "m" (value) : "cc", "memory"); +#elif defined ZMQ_ATOMIC_COUNTER_ARM + integer_t flag, tmp; + __asm__ volatile ( + " dmb sy\n\t" + "1: ldrex %0, [%5]\n\t" + " add %2, %0, %4\n\t" + " strex %1, %2, [%5]\n\t" + " teq %1, #0\n\t" + " bne 1b\n\t" + " dmb sy\n\t" + : "=&r"(old_value), "=&r"(flag), "=&r"(tmp), "+Qo"(value) + : "Ir"(increment_), "r"(&value) + : "cc"); #elif defined ZMQ_ATOMIC_COUNTER_MUTEX sync.lock (); old_value = value; @@ -118,6 +133,20 @@ namespace zmq : "0" (oldval), "m" (*val) : "cc", "memory"); return oldval != decrement; +#elif defined ZMQ_ATOMIC_COUNTER_ARM + integer_t old_value, flag, tmp; + __asm__ volatile ( + " dmb sy\n\t" + "1: ldrex %0, [%5]\n\t" + " sub %2, %0, %4\n\t" + " strex %1, %2, [%5]\n\t" + " teq %1, #0\n\t" + " bne 1b\n\t" + " dmb sy\n\t" + : "=&r"(old_value), "=&r"(flag), "=&r"(tmp), "+Qo"(value) + : "Ir"(decrement), "r"(&value) + : "cc"); + return old_value - decrement != 0; #elif defined ZMQ_ATOMIC_COUNTER_MUTEX sync.lock (); value -= decrement; @@ -157,6 +186,9 @@ namespace zmq #if defined ZMQ_ATOMIC_COUNTER_X86 #undef ZMQ_ATOMIC_COUNTER_X86 #endif +#if defined ZMQ_ATOMIC_COUNTER_ARM +#undef ZMQ_ATOMIC_COUNTER_ARM +#endif #if defined ZMQ_ATOMIC_COUNTER_MUTEX #undef ZMQ_ATOMIC_COUNTER_MUTEX #endif diff --git a/src/atomic_ptr.hpp b/src/atomic_ptr.hpp index c59ab810..e6d358d3 100644 --- a/src/atomic_ptr.hpp +++ b/src/atomic_ptr.hpp @@ -28,6 +28,8 @@ #define ZMQ_ATOMIC_PTR_MUTEX #elif (defined __i386__ || defined __x86_64__) && defined __GNUC__ #define ZMQ_ATOMIC_PTR_X86 +#elif defined __ARM_ARCH_7A__ && defined __GNUC__ +#define ZMQ_ATOMIC_PTR_ARM #elif defined ZMQ_HAVE_WINDOWS #define ZMQ_ATOMIC_PTR_WINDOWS #elif (defined ZMQ_HAVE_SOLARIS || defined ZMQ_HAVE_NETBSD) @@ -87,6 +89,20 @@ namespace zmq : "=r" (old), "=m" (ptr) : "m" (ptr), "0" (val_)); return old; +#elif defined ZMQ_ATOMIC_PTR_ARM + T* old; + unsigned int flag; + __asm__ volatile ( + " dmb sy\n\t" + "1: ldrex %1, [%3]\n\t" + " strex %0, %4, [%3]\n\t" + " teq %0, #0\n\t" + " bne 1b\n\t" + " dmb sy\n\t" + : "=&r"(flag), "=&r"(old), "+Qo"(ptr) + : "r"(&ptr), "r"(val_) + : "cc"); + return old; #elif defined ZMQ_ATOMIC_PTR_MUTEX sync.lock (); T *old = (T*) ptr; @@ -117,6 +133,22 @@ namespace zmq : "r" (val_), "m" (ptr), "0" (cmp_) : "cc"); return old; +#elif defined ZMQ_ATOMIC_PTR_ARM + T *old; + unsigned int flag; + __asm__ volatile ( + " dmb sy\n\t" + "1: ldrex %1, [%3]\n\t" + " mov %0, #0\n\t" + " teq %1, %4\n\t" + " strexeq %0, %5, [%3]\n\t" + " teq %0, #0\n\t" + " bne 1b\n\t" + " dmb sy\n\t" + : "=&r"(flag), "=&r"(old), "+Qo"(ptr) + : "r"(&ptr), "r"(cmp_), "r"(val_) + : "cc"); + return old; #elif defined ZMQ_ATOMIC_PTR_MUTEX sync.lock (); T *old = (T*) ptr; @@ -152,6 +184,9 @@ namespace zmq #if defined ZMQ_ATOMIC_PTR_X86 #undef ZMQ_ATOMIC_PTR_X86 #endif +#if defined ZMQ_ATOMIC_PTR_ARM +#undef ZMQ_ATOMIC_PTR_ARM +#endif #if defined ZMQ_ATOMIC_PTR_MUTEX #undef ZMQ_ATOMIC_PTR_MUTEX #endif