am 6cec7775: am 17216716: Merge "Mutex-free implementation of pthread_rwlock"

* commit '6cec77755ba563f3707f695c99b9d24bff0f1791':
  Mutex-free implementation of pthread_rwlock
This commit is contained in:
Calin Juravle 2014-05-22 10:20:19 +00:00 committed by Android Git Automerger
commit d75b6e2e47
3 changed files with 221 additions and 190 deletions

View File

@ -26,8 +26,11 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#include "pthread_internal.h"
#include <errno.h> #include <errno.h>
#include <sys/atomics.h>
#include "pthread_internal.h"
#include "private/bionic_futex.h"
/* Technical note: /* Technical note:
* *
@ -40,186 +43,169 @@
* Additionally: * Additionally:
* - trying to get the write-lock while there are any readers blocks * - trying to get the write-lock while there are any readers blocks
* - trying to get the read-lock while there is a writer blocks * - trying to get the read-lock while there is a writer blocks
* - a single thread can acquire the lock multiple times in the same mode * - a single thread can acquire the lock multiple times in read mode
* *
* - Posix states that behavior is undefined it a thread tries to acquire * - Posix states that behavior is undefined (may deadlock) if a thread tries
* the lock in two distinct modes (e.g. write after read, or read after write). * to acquire the lock
* - in write mode while already holding the lock (whether in read or write mode)
* - in read mode while already holding the lock in write mode.
* - This implementation will return EDEADLK in "write after write" and "read after
* write" cases and will deadlock in write after read case.
* *
* - This implementation tries to avoid writer starvation by making the readers * TODO: VERY CAREFULLY convert this to use C++11 atomics when possible. All volatile
* block as soon as there is a waiting writer on the lock. However, it cannot * members of pthread_rwlock_t should be converted to atomics<> and __atomic_cmpxchg
* completely eliminate it: each time the lock is unlocked, all waiting threads * should be changed to compare_exchange_strong accompanied by the proper ordering
* are woken and battle for it, which one gets it depends on the kernel scheduler * constraints (comments have been added with the intending ordering across the code).
* and is semi-random. *
* TODO: As it stands now, pendingReaders and pendingWriters could be merged into a
* a single waiters variable. Keeping them separate adds a bit of clarity and keeps
* the door open for a writer-biased implementation.
* *
*/ */
#define RWLOCKATTR_DEFAULT 0 #define RWLOCKATTR_DEFAULT 0
#define RWLOCKATTR_SHARED_MASK 0x0010 #define RWLOCKATTR_SHARED_MASK 0x0010
#define RWLOCK_IS_SHARED(rwlock) ((rwlock)->attr == PTHREAD_PROCESS_SHARED)
extern pthread_internal_t* __get_thread(void); extern pthread_internal_t* __get_thread(void);
int pthread_rwlockattr_init(pthread_rwlockattr_t *attr) int pthread_rwlockattr_init(pthread_rwlockattr_t *attr)
{ {
*attr = PTHREAD_PROCESS_PRIVATE; *attr = PTHREAD_PROCESS_PRIVATE;
return 0; return 0;
} }
int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr) int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr)
{ {
*attr = -1; *attr = -1;
return 0; return 0;
} }
int pthread_rwlockattr_setpshared(pthread_rwlockattr_t *attr, int pshared) int pthread_rwlockattr_setpshared(pthread_rwlockattr_t *attr, int pshared)
{ {
switch (pshared) { switch (pshared) {
case PTHREAD_PROCESS_PRIVATE: case PTHREAD_PROCESS_PRIVATE:
case PTHREAD_PROCESS_SHARED: case PTHREAD_PROCESS_SHARED:
*attr = pshared; *attr = pshared;
return 0; return 0;
default: default:
return EINVAL; return EINVAL;
} }
} }
int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) { int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
*pshared = *attr; *pshared = *attr;
return 0; return 0;
} }
int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr)
{ {
pthread_mutexattr_t* lock_attr = NULL; if (attr) {
pthread_condattr_t* cond_attr = NULL; switch (*attr) {
pthread_mutexattr_t lock_attr0; case PTHREAD_PROCESS_SHARED:
pthread_condattr_t cond_attr0; case PTHREAD_PROCESS_PRIVATE:
int ret; rwlock->attr= *attr;
break;
if (attr && *attr == PTHREAD_PROCESS_SHARED) { default:
lock_attr = &lock_attr0; return EINVAL;
pthread_mutexattr_init(lock_attr);
pthread_mutexattr_setpshared(lock_attr, PTHREAD_PROCESS_SHARED);
cond_attr = &cond_attr0;
pthread_condattr_init(cond_attr);
pthread_condattr_setpshared(cond_attr, PTHREAD_PROCESS_SHARED);
} }
}
ret = pthread_mutex_init(&rwlock->lock, lock_attr); rwlock->state = 0;
if (ret != 0) rwlock->pendingReaders = 0;
return ret; rwlock->pendingWriters = 0;
rwlock->writerThreadId = 0;
ret = pthread_cond_init(&rwlock->cond, cond_attr); return 0;
if (ret != 0) {
pthread_mutex_destroy(&rwlock->lock);
return ret;
}
rwlock->numLocks = 0;
rwlock->pendingReaders = 0;
rwlock->pendingWriters = 0;
rwlock->writerThreadId = 0;
return 0;
} }
int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
{ {
if (rwlock->numLocks > 0) if (rwlock->state != 0) {
return EBUSY; return EBUSY;
}
pthread_cond_destroy(&rwlock->cond); return 0;
pthread_mutex_destroy(&rwlock->lock);
return 0;
}
/* Returns TRUE iff we can acquire a read lock. */
static __inline__ int read_precondition(pthread_rwlock_t* rwlock, int tid)
{
/* We can't have the lock if any writer is waiting for it (writer bias).
* This tries to avoid starvation when there are multiple readers racing.
*/
if (rwlock->pendingWriters > 0)
return 0;
/* We can have the lock if there is no writer, or if we write-own it */
/* The second test avoids a self-dead lock in case of buggy code. */
if (rwlock->writerThreadId == 0 || rwlock->writerThreadId == tid)
return 1;
/* Otherwise, we can't have it */
return 0;
}
/* returns TRUE iff we can acquire a write lock. */
static __inline__ int write_precondition(pthread_rwlock_t* rwlock, int tid)
{
/* We can get the lock if nobody has it */
if (rwlock->numLocks == 0)
return 1;
/* Or if we already own it */
if (rwlock->writerThreadId == tid)
return 1;
/* Otherwise, not */
return 0;
}
/* This function is used to waken any waiting thread contending
* for the lock. One of them should be able to grab it after
* that.
*/
static void _pthread_rwlock_pulse(pthread_rwlock_t *rwlock)
{
if (rwlock->pendingReaders > 0 || rwlock->pendingWriters > 0)
pthread_cond_broadcast(&rwlock->cond);
} }
static int __pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) { static int __pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
int ret = 0; if (__predict_false(__get_thread()->tid == rwlock->writerThreadId)) {
return EDEADLK;
pthread_mutex_lock(&rwlock->lock);
int tid = __get_thread()->tid;
if (__predict_false(!read_precondition(rwlock, tid))) {
rwlock->pendingReaders += 1;
do {
ret = pthread_cond_timedwait(&rwlock->cond, &rwlock->lock, abs_timeout);
} while (ret == 0 && !read_precondition(rwlock, tid));
rwlock->pendingReaders -= 1;
if (ret != 0) {
goto EXIT;
}
} }
++rwlock->numLocks;
EXIT: bool done = false;
pthread_mutex_unlock(&rwlock->lock); do {
return ret; // This is actually a race read as there's nothing that guarantees the atomicity of integers
// reads / writes. However, in practice this "never" happens so until we switch to C++11 this
// should work fine. The same applies in the other places this idiom is used.
int32_t cur_state = rwlock->state; // C++11 relaxed atomic read
if (__predict_true(cur_state >= 0)) {
// Add as an extra reader.
done = __atomic_cmpxchg(cur_state, cur_state + 1, &rwlock->state) == 0; // C++11 memory_order_aquire
} else {
timespec ts;
timespec* tsp = NULL;
if (abs_timeout != NULL) {
if (__timespec_from_absolute(&ts, abs_timeout, CLOCK_REALTIME) < 0) {
return ETIMEDOUT;
}
tsp = &ts;
}
// Owner holds it in write mode, hang up.
// To avoid loosing wake ups the pendingReaders update and the state read should be
// sequentially consistent. (currently enforced by __atomic_inc which creates a full barrier)
__atomic_inc(&rwlock->pendingReaders); // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
if (__futex_wait_ex(&rwlock->state, RWLOCK_IS_SHARED(rwlock), cur_state, tsp) != 0) {
if (errno == ETIMEDOUT) {
__atomic_dec(&rwlock->pendingReaders); // C++11 memory_order_relaxed
return ETIMEDOUT;
}
}
__atomic_dec(&rwlock->pendingReaders); // C++11 memory_order_relaxed
}
} while (!done);
return 0;
} }
static int __pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) { static int __pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
int ret = 0;
pthread_mutex_lock(&rwlock->lock);
int tid = __get_thread()->tid; int tid = __get_thread()->tid;
if (__predict_false(!write_precondition(rwlock, tid))) { if (__predict_false(tid == rwlock->writerThreadId)) {
// If we can't read yet, wait until the rwlock is unlocked return EDEADLK;
// and try again. Increment pendingReaders to get the
// cond broadcast when that happens.
rwlock->pendingWriters += 1;
do {
ret = pthread_cond_timedwait(&rwlock->cond, &rwlock->lock, abs_timeout);
} while (ret == 0 && !write_precondition(rwlock, tid));
rwlock->pendingWriters -= 1;
if (ret != 0) {
goto EXIT;
}
} }
++rwlock->numLocks;
bool done = false;
do {
int32_t cur_state = rwlock->state;
if (__predict_true(cur_state == 0)) {
// Change state from 0 to -1.
done = __atomic_cmpxchg(0 /* cur_state */, -1 /* new state */, &rwlock->state) == 0; // C++11 memory_order_aquire
} else {
timespec ts;
timespec* tsp = NULL;
if (abs_timeout != NULL) {
if (__timespec_from_absolute(&ts, abs_timeout, CLOCK_REALTIME) < 0) {
return ETIMEDOUT;
}
tsp = &ts;
}
// Failed to acquire, hang up.
// To avoid loosing wake ups the pendingWriters update and the state read should be
// sequentially consistent. (currently enforced by __atomic_inc which creates a full barrier)
__atomic_inc(&rwlock->pendingWriters); // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
if (__futex_wait_ex(&rwlock->state, RWLOCK_IS_SHARED(rwlock), cur_state, tsp) != 0) {
if (errno == ETIMEDOUT) {
__atomic_dec(&rwlock->pendingWriters); // C++11 memory_order_relaxed
return ETIMEDOUT;
}
}
__atomic_dec(&rwlock->pendingWriters); // C++11 memory_order_relaxed
}
} while (!done);
rwlock->writerThreadId = tid; rwlock->writerThreadId = tid;
EXIT: return 0;
pthread_mutex_unlock(&rwlock->lock);
return ret;
} }
int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) { int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
@ -228,16 +214,15 @@ int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
{ {
int ret = 0; int32_t cur_state = rwlock->state;
if (cur_state >= 0) {
pthread_mutex_lock(&rwlock->lock); if(__atomic_cmpxchg(cur_state, cur_state + 1, &rwlock->state) != 0) { // C++11 memory_order_acquire
if (__predict_false(!read_precondition(rwlock, __get_thread()->tid))) return EBUSY;
ret = EBUSY; }
else } else {
++rwlock->numLocks; return EBUSY;
pthread_mutex_unlock(&rwlock->lock); }
return 0;
return ret;
} }
int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) { int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
@ -250,18 +235,18 @@ int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock) {
int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
{ {
int ret = 0; int tid = __get_thread()->tid;
int32_t cur_state = rwlock->state;
pthread_mutex_lock(&rwlock->lock); if (cur_state == 0) {
int tid = __get_thread()->tid; if(__atomic_cmpxchg(0, -1, &rwlock->state) != 0) { // C++11 memory_order_acquire
if (__predict_false(!write_precondition(rwlock, tid))) { return EBUSY;
ret = EBUSY;
} else {
++rwlock->numLocks;
rwlock->writerThreadId = tid;
} }
pthread_mutex_unlock(&rwlock->lock); } else {
return ret; return EBUSY;
}
rwlock->writerThreadId = tid;
return 0;
} }
int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) { int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
@ -270,35 +255,43 @@ int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_tim
int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
{ {
int ret = 0; int tid = __get_thread()->tid;
bool done = false;
pthread_mutex_lock(&rwlock->lock); do {
int32_t cur_state = rwlock->state;
/* The lock must be held */ if (cur_state == 0) {
if (rwlock->numLocks == 0) { return EPERM;
ret = EPERM;
goto EXIT;
} }
if (cur_state == -1) {
if (rwlock->writerThreadId != tid) {
return EPERM;
}
// We're no longer the owner.
rwlock->writerThreadId = 0;
// Change state from -1 to 0.
// We use __atomic_cmpxchg to achieve sequential consistency of the state store and
// the following pendingX loads. A simple store with memory_order_release semantics
// is not enough to guarantee that the pendingX loads are not reordered before the
// store (which may lead to a lost wakeup).
__atomic_cmpxchg(-1 /* cur_state*/, 0 /* new state */, &rwlock->state); // C++11 maybe memory_order_seq_cst?
/* If it has only readers, writerThreadId is 0 */ // Wake any waiters.
if (rwlock->writerThreadId == 0) { if (__predict_false(rwlock->pendingReaders > 0 || rwlock->pendingWriters > 0)) {
if (--rwlock->numLocks == 0) __futex_wake_ex(&rwlock->state, RWLOCK_IS_SHARED(rwlock), INT_MAX);
_pthread_rwlock_pulse(rwlock); }
} done = true;
/* Otherwise, it has only a single writer, which } else { // cur_state > 0
* must be ourselves. // Reduce state by 1.
*/ // See the above comment on why we need __atomic_cmpxchg.
else { done = __atomic_cmpxchg(cur_state, cur_state - 1, &rwlock->state) == 0; // C++11 maybe memory_order_seq_cst?
if (rwlock->writerThreadId != __get_thread()->tid) { if (done && (cur_state - 1) == 0) {
ret = EPERM; // There are no more readers, wake any waiters.
goto EXIT; if (__predict_false(rwlock->pendingReaders > 0 || rwlock->pendingWriters > 0)) {
} __futex_wake_ex(&rwlock->state, RWLOCK_IS_SHARED(rwlock), INT_MAX);
if (--rwlock->numLocks == 0) {
rwlock->writerThreadId = 0;
_pthread_rwlock_pulse(rwlock);
} }
}
} }
EXIT: } while (!done);
pthread_mutex_unlock(&rwlock->lock);
return ret; return 0;
} }

View File

@ -94,16 +94,17 @@ typedef long pthread_condattr_t;
typedef long pthread_rwlockattr_t; typedef long pthread_rwlockattr_t;
typedef struct { typedef struct {
pthread_mutex_t lock; pthread_mutex_t __unused_lock;
pthread_cond_t cond; pthread_cond_t __unused_cond;
int numLocks; volatile int32_t state; // 0=unlock, -1=writer lock, +n=reader lock
int writerThreadId; volatile int32_t writerThreadId;
int pendingReaders; volatile int32_t pendingReaders;
int pendingWriters; volatile int32_t pendingWriters;
void* __reserved[4]; int32_t attr;
void* __reserved[3];
} pthread_rwlock_t; } pthread_rwlock_t;
#define PTHREAD_RWLOCK_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, 0, 0, 0, 0, { NULL, NULL, NULL, NULL } } #define PTHREAD_RWLOCK_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, 0, 0, 0, 0, 0, { NULL, NULL, NULL } }
typedef int pthread_key_t; typedef int pthread_key_t;
typedef long pthread_t; typedef long pthread_t;

View File

@ -551,12 +551,49 @@ TEST(pthread, pthread_rwlock_smoke) {
pthread_rwlock_t l; pthread_rwlock_t l;
ASSERT_EQ(0, pthread_rwlock_init(&l, NULL)); ASSERT_EQ(0, pthread_rwlock_init(&l, NULL));
// Single read lock
ASSERT_EQ(0, pthread_rwlock_rdlock(&l)); ASSERT_EQ(0, pthread_rwlock_rdlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l)); ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// Multiple read lock
ASSERT_EQ(0, pthread_rwlock_rdlock(&l));
ASSERT_EQ(0, pthread_rwlock_rdlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// Write lock
ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// Try writer lock
ASSERT_EQ(0, pthread_rwlock_trywrlock(&l));
ASSERT_EQ(EBUSY, pthread_rwlock_trywrlock(&l));
ASSERT_EQ(EBUSY, pthread_rwlock_tryrdlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// Try reader lock
ASSERT_EQ(0, pthread_rwlock_tryrdlock(&l));
ASSERT_EQ(0, pthread_rwlock_tryrdlock(&l));
ASSERT_EQ(EBUSY, pthread_rwlock_trywrlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// Try writer lock after unlock
ASSERT_EQ(0, pthread_rwlock_wrlock(&l)); ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l)); ASSERT_EQ(0, pthread_rwlock_unlock(&l));
#ifdef __BIONIC__
// EDEADLK in "read after write"
ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
ASSERT_EQ(EDEADLK, pthread_rwlock_rdlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
// EDEADLK in "write after write"
ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
ASSERT_EQ(EDEADLK, pthread_rwlock_wrlock(&l));
ASSERT_EQ(0, pthread_rwlock_unlock(&l));
#endif
ASSERT_EQ(0, pthread_rwlock_destroy(&l)); ASSERT_EQ(0, pthread_rwlock_destroy(&l));
} }