am da2317ed: am cdc7ad11: Merge "Use mmap to create the pthread_internal_t"

* commit 'da2317edb36acdaa6a7bc49cef8deba2a42bfb15':
  Use mmap to create the pthread_internal_t
This commit is contained in:
Yabin Cui 2014-12-20 00:19:25 +00:00 committed by Android Git Automerger
commit a15bd72cf0
8 changed files with 171 additions and 88 deletions

View File

@ -47,6 +47,21 @@ static void BM_pthread_getspecific(int iters) {
} }
BENCHMARK(BM_pthread_getspecific); BENCHMARK(BM_pthread_getspecific);
static void BM_pthread_setspecific(int iters) {
StopBenchmarkTiming();
pthread_key_t key;
pthread_key_create(&key, NULL);
StartBenchmarkTiming();
for (int i = 0; i < iters; ++i) {
pthread_setspecific(key, NULL);
}
StopBenchmarkTiming();
pthread_key_delete(key);
}
BENCHMARK(BM_pthread_setspecific);
static void DummyPthreadOnceInitFunction() { static void DummyPthreadOnceInitFunction() {
} }
@ -137,3 +152,80 @@ static void BM_pthread_rw_lock_write(int iters) {
pthread_rwlock_destroy(&lock); pthread_rwlock_destroy(&lock);
} }
BENCHMARK(BM_pthread_rw_lock_write); BENCHMARK(BM_pthread_rw_lock_write);
static void* IdleThread(void*) {
return NULL;
}
static void BM_pthread_create(int iters) {
StopBenchmarkTiming();
pthread_t thread;
for (int i = 0; i < iters; ++i) {
StartBenchmarkTiming();
pthread_create(&thread, NULL, IdleThread, NULL);
StopBenchmarkTiming();
pthread_join(thread, NULL);
}
}
BENCHMARK(BM_pthread_create);
static void* RunThread(void*) {
StopBenchmarkTiming();
return NULL;
}
static void BM_pthread_create_and_run(int iters) {
StopBenchmarkTiming();
pthread_t thread;
for (int i = 0; i < iters; ++i) {
StartBenchmarkTiming();
pthread_create(&thread, NULL, RunThread, NULL);
pthread_join(thread, NULL);
}
}
BENCHMARK(BM_pthread_create_and_run);
static void* ExitThread(void*) {
StartBenchmarkTiming();
pthread_exit(NULL);
}
static void BM_pthread_exit_and_join(int iters) {
StopBenchmarkTiming();
pthread_t thread;
for (int i = 0; i < iters; ++i) {
pthread_create(&thread, NULL, ExitThread, NULL);
pthread_join(thread, NULL);
StopBenchmarkTiming();
}
}
BENCHMARK(BM_pthread_exit_and_join);
static void BM_pthread_key_create(int iters) {
StopBenchmarkTiming();
pthread_key_t key;
for (int i = 0; i < iters; ++i) {
StartBenchmarkTiming();
pthread_key_create(&key, NULL);
StopBenchmarkTiming();
pthread_key_delete(key);
}
}
BENCHMARK(BM_pthread_key_create);
static void BM_pthread_key_delete(int iters) {
StopBenchmarkTiming();
pthread_key_t key;
for (int i = 0; i < iters; ++i) {
pthread_key_create(&key, NULL);
StartBenchmarkTiming();
pthread_key_delete(key);
StopBenchmarkTiming();
}
}
BENCHMARK(BM_pthread_key_delete);

View File

@ -74,9 +74,7 @@ uintptr_t __stack_chk_guard = 0;
void __libc_init_tls(KernelArgumentBlock& args) { void __libc_init_tls(KernelArgumentBlock& args) {
__libc_auxv = args.auxv; __libc_auxv = args.auxv;
static void* tls[BIONIC_TLS_SLOTS];
static pthread_internal_t main_thread; static pthread_internal_t main_thread;
main_thread.tls = tls;
// Tell the kernel to clear our tid field when we exit, so we're like any other pthread. // Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
// As a side-effect, this tells us our pid (which is the same as the main thread's tid). // As a side-effect, this tells us our pid (which is the same as the main thread's tid).
@ -96,7 +94,7 @@ void __libc_init_tls(KernelArgumentBlock& args) {
__init_thread(&main_thread, false); __init_thread(&main_thread, false);
__init_tls(&main_thread); __init_tls(&main_thread);
__set_tls(main_thread.tls); __set_tls(main_thread.tls);
tls[TLS_SLOT_BIONIC_PREINIT] = &args; main_thread.tls[TLS_SLOT_BIONIC_PREINIT] = &args;
__init_alternate_signal_stack(&main_thread); __init_alternate_signal_stack(&main_thread);
} }

View File

@ -35,6 +35,7 @@
#include "pthread_internal.h" #include "pthread_internal.h"
#include "private/bionic_macros.h" #include "private/bionic_macros.h"
#include "private/bionic_prctl.h"
#include "private/bionic_ssp.h" #include "private/bionic_ssp.h"
#include "private/bionic_tls.h" #include "private/bionic_tls.h"
#include "private/libc_logging.h" #include "private/libc_logging.h"
@ -72,6 +73,10 @@ void __init_alternate_signal_stack(pthread_internal_t* thread) {
ss.ss_flags = 0; ss.ss_flags = 0;
sigaltstack(&ss, NULL); sigaltstack(&ss, NULL);
thread->alternate_signal_stack = ss.ss_sp; thread->alternate_signal_stack = ss.ss_sp;
// We can only use const static allocated string for mapped region name, as Android kernel
// uses the string pointer directly when dumping /proc/pid/maps.
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
} }
} }
@ -101,31 +106,64 @@ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list) {
return error; return error;
} }
static void* __create_thread_stack(pthread_internal_t* thread) { static void* __create_thread_stack(const pthread_attr_t& attr) {
// Create a new private anonymous map. // Create a new private anonymous map.
int prot = PROT_READ | PROT_WRITE; int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
void* stack = mmap(NULL, thread->attr.stack_size, prot, flags, -1, 0); void* stack = mmap(NULL, attr.stack_size, prot, flags, -1, 0);
if (stack == MAP_FAILED) { if (stack == MAP_FAILED) {
__libc_format_log(ANDROID_LOG_WARN, __libc_format_log(ANDROID_LOG_WARN,
"libc", "libc",
"pthread_create failed: couldn't allocate %zd-byte stack: %s", "pthread_create failed: couldn't allocate %zd-byte stack: %s",
thread->attr.stack_size, strerror(errno)); attr.stack_size, strerror(errno));
return NULL; return NULL;
} }
// Set the guard region at the end of the stack to PROT_NONE. // Set the guard region at the end of the stack to PROT_NONE.
if (mprotect(stack, thread->attr.guard_size, PROT_NONE) == -1) { if (mprotect(stack, attr.guard_size, PROT_NONE) == -1) {
__libc_format_log(ANDROID_LOG_WARN, "libc", __libc_format_log(ANDROID_LOG_WARN, "libc",
"pthread_create failed: couldn't mprotect PROT_NONE %zd-byte stack guard region: %s", "pthread_create failed: couldn't mprotect PROT_NONE %zd-byte stack guard region: %s",
thread->attr.guard_size, strerror(errno)); attr.guard_size, strerror(errno));
munmap(stack, thread->attr.stack_size); munmap(stack, attr.stack_size);
return NULL; return NULL;
} }
return stack; return stack;
} }
static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
if (attr->stack_base == NULL) {
// The caller didn't provide a stack, so allocate one.
// Make sure the stack size and guard size are multiples of PAGE_SIZE.
attr->stack_size = BIONIC_ALIGN(attr->stack_size, PAGE_SIZE);
attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
attr->stack_base = __create_thread_stack(*attr);
if (attr->stack_base == NULL) {
return EAGAIN;
}
} else {
// The caller did provide a stack, so remember we're not supposed to free it.
attr->flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
}
// Thread stack is used for two sections:
// pthread_internal_t.
// regular stack, from top to down.
uint8_t* stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + attr->stack_size;
stack_top -= sizeof(pthread_internal_t);
pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
// No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned,
// and user allocated stack is guaranteed by pthread_attr_setstack.
thread->attr = *attr;
__init_tls(thread);
*threadp = thread;
*child_stack = stack_top;
return 0;
}
static int __pthread_start(void* arg) { static int __pthread_start(void* arg) {
pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg); pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
@ -158,43 +196,21 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
// Inform the rest of the C library that at least one thread was created. // Inform the rest of the C library that at least one thread was created.
__isthreaded = 1; __isthreaded = 1;
pthread_internal_t* thread = __create_thread_struct(); pthread_attr_t thread_attr;
if (thread == NULL) {
return EAGAIN;
}
if (attr == NULL) { if (attr == NULL) {
pthread_attr_init(&thread->attr); pthread_attr_init(&thread_attr);
} else { } else {
thread->attr = *attr; thread_attr = *attr;
attr = NULL; // Prevent misuse below. attr = NULL; // Prevent misuse below.
} }
// Make sure the stack size and guard size are multiples of PAGE_SIZE. pthread_internal_t* thread = NULL;
thread->attr.stack_size = BIONIC_ALIGN(thread->attr.stack_size, PAGE_SIZE); void* child_stack = NULL;
thread->attr.guard_size = BIONIC_ALIGN(thread->attr.guard_size, PAGE_SIZE); int result = __allocate_thread(&thread_attr, &thread, &child_stack);
if (result != 0) {
if (thread->attr.stack_base == NULL) { return result;
// The caller didn't provide a stack, so allocate one.
thread->attr.stack_base = __create_thread_stack(thread);
if (thread->attr.stack_base == NULL) {
__free_thread_struct(thread);
return EAGAIN;
}
} else {
// The caller did provide a stack, so remember we're not supposed to free it.
thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
} }
// Make room for the TLS area.
// The child stack is the same address, just growing in the opposite direction.
// At offsets >= 0, we have the TLS slots.
// At offsets < 0, we have the child stack.
thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) +
thread->attr.stack_size - BIONIC_ALIGN(BIONIC_TLS_SLOTS * sizeof(void*), 16));
void* child_stack = thread->tls;
__init_tls(thread);
// Create a mutex for the thread in TLS to wait on once it starts so we can keep // Create a mutex for the thread in TLS to wait on once it starts so we can keep
// it from doing anything until after we notify the debugger about it // it from doing anything until after we notify the debugger about it
// //
@ -211,7 +227,7 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID; CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
void* tls = thread->tls; void* tls = reinterpret_cast<void*>(thread->tls);
#if defined(__i386__) #if defined(__i386__)
// On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
// a pointer to the TLS itself. // a pointer to the TLS itself.
@ -229,7 +245,6 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
if (!thread->user_allocated_stack()) { if (!thread->user_allocated_stack()) {
munmap(thread->attr.stack_base, thread->attr.stack_size); munmap(thread->attr.stack_base, thread->attr.stack_size);
} }
__free_thread_struct(thread);
__libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno)); __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
return clone_errno; return clone_errno;
} }

View File

@ -46,7 +46,7 @@ int pthread_detach(pthread_t t) {
if (thread->tid == 0) { if (thread->tid == 0) {
// Already exited; clean up. // Already exited; clean up.
_pthread_internal_remove_locked(thread.get()); _pthread_internal_remove_locked(thread.get(), true);
return 0; return 0;
} }

View File

@ -90,7 +90,7 @@ void pthread_exit(void* return_value) {
// Keep track of what we need to know about the stack before we lose the pthread_internal_t. // Keep track of what we need to know about the stack before we lose the pthread_internal_t.
void* stack_base = thread->attr.stack_base; void* stack_base = thread->attr.stack_base;
size_t stack_size = thread->attr.stack_size; size_t stack_size = thread->attr.stack_size;
bool user_allocated_stack = thread->user_allocated_stack(); bool free_stack = false;
pthread_mutex_lock(&g_thread_list_lock); pthread_mutex_lock(&g_thread_list_lock);
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) { if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
@ -98,24 +98,18 @@ void pthread_exit(void* return_value) {
// First make sure that the kernel does not try to clear the tid field // First make sure that the kernel does not try to clear the tid field
// because we'll have freed the memory before the thread actually exits. // because we'll have freed the memory before the thread actually exits.
__set_tid_address(NULL); __set_tid_address(NULL);
_pthread_internal_remove_locked(thread);
} else { // pthread_internal_t is freed below with stack, not here.
// Make sure that the pthread_internal_t doesn't have stale pointers to a stack that _pthread_internal_remove_locked(thread, false);
// will be unmapped after the exit call below. if (!thread->user_allocated_stack()) {
if (!user_allocated_stack) { free_stack = true;
thread->attr.stack_base = NULL;
thread->attr.stack_size = 0;
thread->tls = NULL;
} }
// pthread_join is responsible for destroying the pthread_internal_t for non-detached threads.
// The kernel will futex_wake on the pthread_internal_t::tid field to wake pthread_join.
} }
pthread_mutex_unlock(&g_thread_list_lock); pthread_mutex_unlock(&g_thread_list_lock);
if (user_allocated_stack) { // Detached threads exit with stack teardown, and everything deallocated here.
// Cleaning up this thread's stack is the creator's responsibility, not ours. // Threads that can be joined exit but leave their stacks for the pthread_join caller to clean up.
__exit(0); if (free_stack) {
} else {
// We need to munmap the stack we're running on before calling exit. // We need to munmap the stack we're running on before calling exit.
// That's not something we can do in C. // That's not something we can do in C.
@ -126,5 +120,7 @@ void pthread_exit(void* return_value) {
sigprocmask(SIG_SETMASK, &mask, NULL); sigprocmask(SIG_SETMASK, &mask, NULL);
_exit_with_stack_teardown(stack_base, stack_size); _exit_with_stack_teardown(stack_base, stack_size);
} else {
__exit(0);
} }
} }

View File

@ -30,6 +30,8 @@
#include <pthread.h> #include <pthread.h>
#include "private/bionic_tls.h"
/* Has the thread been detached by a pthread_join or pthread_detach call? */ /* Has the thread been detached by a pthread_join or pthread_detach call? */
#define PTHREAD_ATTR_FLAG_DETACHED 0x00000001 #define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
@ -72,8 +74,6 @@ struct pthread_internal_t {
return (attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0; return (attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0;
} }
void** tls;
pthread_attr_t attr; pthread_attr_t attr;
__pthread_cleanup_t* cleanup_stack; __pthread_cleanup_t* cleanup_stack;
@ -86,16 +86,16 @@ struct pthread_internal_t {
pthread_mutex_t startup_handshake_mutex; pthread_mutex_t startup_handshake_mutex;
void* tls[BIONIC_TLS_SLOTS];
/* /*
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
* per-thread buffer by simply using malloc(3) and free(3). * per-thread buffer by simply using malloc(3) and free(3).
*/ */
#define __BIONIC_DLERROR_BUFFER_SIZE 512 #define __BIONIC_DLERROR_BUFFER_SIZE 512
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE]; char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
}; } __attribute__((aligned(16))); // Align it as thread stack top below it should be aligned.
__LIBC_HIDDEN__ pthread_internal_t* __create_thread_struct();
__LIBC_HIDDEN__ void __free_thread_struct(pthread_internal_t*);
__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list); __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list);
__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread); __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*); __LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
@ -105,7 +105,7 @@ __LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread);
extern "C" __LIBC64_HIDDEN__ pthread_internal_t* __get_thread(void); extern "C" __LIBC64_HIDDEN__ pthread_internal_t* __get_thread(void);
__LIBC_HIDDEN__ void pthread_key_clean_all(void); __LIBC_HIDDEN__ void pthread_key_clean_all(void);
__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread); __LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread);
/* /*
* Traditionally we gave threads a 1MiB stack. When we started * Traditionally we gave threads a 1MiB stack. When we started

View File

@ -41,26 +41,7 @@
pthread_internal_t* g_thread_list = NULL; pthread_internal_t* g_thread_list = NULL;
pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_internal_t* __create_thread_struct() { void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread) {
void* result = mmap(NULL, sizeof(pthread_internal_t), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (result == MAP_FAILED) {
__libc_format_log(ANDROID_LOG_WARN, "libc",
"__create_thread_struct() failed: %s", strerror(errno));
return NULL;
}
return reinterpret_cast<pthread_internal_t*>(result);
}
void __free_thread_struct(pthread_internal_t* thread) {
int result = munmap(thread, sizeof(pthread_internal_t));
if (result != 0) {
__libc_format_log(ANDROID_LOG_WARN, "libc",
"__free_thread_struct() failed: %s", strerror(errno));
}
}
void _pthread_internal_remove_locked(pthread_internal_t* thread) {
if (thread->next != NULL) { if (thread->next != NULL) {
thread->next->prev = thread->prev; thread->next->prev = thread->prev;
} }
@ -70,10 +51,11 @@ void _pthread_internal_remove_locked(pthread_internal_t* thread) {
g_thread_list = thread->next; g_thread_list = thread->next;
} }
// The main thread is not heap-allocated. See __libc_init_tls for the declaration, // For threads using user allocated stack (including the main thread), the pthread_internal_t
// and __libc_init_common for the point where it's added to the thread list. // can't be freed since it is on the stack.
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_MAIN_THREAD) == 0) { if (free_thread && !(thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK)) {
__free_thread_struct(thread); // Use one munmap to free the whole thread stack, including pthread_internal_t.
munmap(thread->attr.stack_base, thread->attr.stack_size);
} }
} }

View File

@ -74,6 +74,6 @@ int pthread_join(pthread_t t, void** return_value) {
*return_value = thread->return_value; *return_value = thread->return_value;
} }
_pthread_internal_remove_locked(thread.get()); _pthread_internal_remove_locked(thread.get(), true);
return 0; return 0;
} }