Fix pthread_join.

Let the kernel keep pthread_internal_t::tid updated, including
across forks and for the main thread. This then lets us fix
pthread_join to only return after the thread has really exited.

Also fix the thread attributes of the main thread so we don't
unmap the main thread's stack (which is really owned by the
dynamic linker and contains things like environment variables),
which fixes crashes when joining with an exited main thread
and also fixes problems reported publicly with accessing environment
variables after the main thread exits (for which I've added a new
unit test).

In passing I also fixed a bug where if the clone(2) inside
pthread_create(3) fails, we'd unmap the child's stack and TLS (which
contains the mutex) and then try to unlock the mutex. Boom! It wasn't
until after I'd uploaded the fix for this that I came across a new
public bug reporting this exact failure.

Bug: 8206355
Bug: 11693195
Bug: https://code.google.com/p/android/issues/detail?id=57421
Bug: https://code.google.com/p/android/issues/detail?id=62392
Change-Id: I2af9cf6e8ae510a67256ad93cad891794ed0580b
This commit is contained in:
Elliott Hughes 2013-11-15 17:40:18 -08:00
parent f246c589d6
commit 877ec6d904
22 changed files with 263 additions and 81 deletions

View File

@ -276,6 +276,7 @@ int personality(unsigned long) all
long perf_event_open(struct perf_event_attr* attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags) all
pid_t __clone:clone(int, void*, int*, void*, int*) all
int __set_tid_address:set_tid_address(int*) all
int epoll_create1(int) all
int epoll_ctl(int, int op, int, struct epoll_event*) all

View File

@ -19,6 +19,7 @@ syscall_src += arch-aarch64/syscalls/__rt_sigprocmask.S
syscall_src += arch-aarch64/syscalls/__rt_sigsuspend.S
syscall_src += arch-aarch64/syscalls/__rt_sigtimedwait.S
syscall_src += arch-aarch64/syscalls/__sched_getaffinity.S
syscall_src += arch-aarch64/syscalls/__set_tid_address.S
syscall_src += arch-aarch64/syscalls/__syslog.S
syscall_src += arch-aarch64/syscalls/__timer_create.S
syscall_src += arch-aarch64/syscalls/__timer_delete.S

View File

@ -0,0 +1,22 @@
/* Generated by gensyscalls.py. Do not edit. */
#include <private/bionic_asm.h>
ENTRY(__set_tid_address)
stp x29, x30, [sp, #-16]!
mov x29, sp
str x8, [sp, #-16]!
mov x8, __NR_set_tid_address
svc #0
ldr x8, [sp], #16
ldp x29, x30, [sp], #16
cmn x0, #(MAX_ERRNO + 1)
cneg x0, x0, hi
b.hi __set_errno
ret
END(__set_tid_address)
.hidden _C_LABEL(__set_tid_address)

View File

@ -23,6 +23,7 @@ syscall_src += arch-arm/syscalls/__rt_sigprocmask.S
syscall_src += arch-arm/syscalls/__rt_sigsuspend.S
syscall_src += arch-arm/syscalls/__rt_sigtimedwait.S
syscall_src += arch-arm/syscalls/__sched_getaffinity.S
syscall_src += arch-arm/syscalls/__set_tid_address.S
syscall_src += arch-arm/syscalls/__set_tls.S
syscall_src += arch-arm/syscalls/__sigaction.S
syscall_src += arch-arm/syscalls/__statfs64.S

View File

@ -0,0 +1,14 @@
/* Generated by gensyscalls.py. Do not edit. */
#include <private/bionic_asm.h>
ENTRY(__set_tid_address)
mov ip, r7
ldr r7, =__NR_set_tid_address
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(__set_tid_address)

View File

@ -24,6 +24,7 @@ syscall_src += arch-mips/syscalls/__rt_sigsuspend.S
syscall_src += arch-mips/syscalls/__rt_sigtimedwait.S
syscall_src += arch-mips/syscalls/__sched_getaffinity.S
syscall_src += arch-mips/syscalls/__set_thread_area.S
syscall_src += arch-mips/syscalls/__set_tid_address.S
syscall_src += arch-mips/syscalls/__sigaction.S
syscall_src += arch-mips/syscalls/__statfs64.S
syscall_src += arch-mips/syscalls/__syslog.S

View File

@ -0,0 +1,23 @@
/* Generated by gensyscalls.py. Do not edit. */
#include <asm/unistd.h>
.text
.globl __set_tid_address
.align 4
.ent __set_tid_address
__set_tid_address:
.set noreorder
.cpload $t9
li $v0, __NR_set_tid_address
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end __set_tid_address

View File

@ -24,6 +24,7 @@ syscall_src += arch-x86/syscalls/__rt_sigsuspend.S
syscall_src += arch-x86/syscalls/__rt_sigtimedwait.S
syscall_src += arch-x86/syscalls/__sched_getaffinity.S
syscall_src += arch-x86/syscalls/__set_thread_area.S
syscall_src += arch-x86/syscalls/__set_tid_address.S
syscall_src += arch-x86/syscalls/__sigaction.S
syscall_src += arch-x86/syscalls/__statfs64.S
syscall_src += arch-x86/syscalls/__syslog.S

View File

@ -0,0 +1,20 @@
/* Generated by gensyscalls.py. Do not edit. */
#include <private/bionic_asm.h>
ENTRY(__set_tid_address)
pushl %ebx
mov 8(%esp), %ebx
movl $__NR_set_tid_address, %eax
int $0x80
cmpl $-MAX_ERRNO, %eax
jb 1f
negl %eax
pushl %eax
call __set_errno
addl $4, %esp
orl $-1, %eax
1:
popl %ebx
ret
END(__set_tid_address)

View File

@ -20,6 +20,7 @@ syscall_src += arch-x86_64/syscalls/__rt_sigprocmask.S
syscall_src += arch-x86_64/syscalls/__rt_sigsuspend.S
syscall_src += arch-x86_64/syscalls/__rt_sigtimedwait.S
syscall_src += arch-x86_64/syscalls/__sched_getaffinity.S
syscall_src += arch-x86_64/syscalls/__set_tid_address.S
syscall_src += arch-x86_64/syscalls/__syslog.S
syscall_src += arch-x86_64/syscalls/__timer_create.S
syscall_src += arch-x86_64/syscalls/__timer_delete.S

View File

@ -0,0 +1,17 @@
/* Generated by gensyscalls.py. Do not edit. */
#include <private/bionic_asm.h>
ENTRY(__set_tid_address)
movl $__NR_set_tid_address, %eax
syscall
cmpq $-MAX_ERRNO, %rax
jb 1f
negl %eax
movl %eax, %edi
call __set_errno
orq $-1, %rax
1:
ret
END(__set_tid_address)
.hidden _C_LABEL(__set_tid_address)

View File

@ -41,7 +41,12 @@ int fork() {
__timer_table_start_stop(1);
__bionic_atfork_run_prepare();
int result = __clone(SIGCHLD, NULL, NULL, NULL, NULL);
pthread_internal_t* self = __get_thread();
#if defined(__x86_64__)
int result = __clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, NULL, NULL, &(self->tid), NULL);
#else
int result = __clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, NULL, NULL, NULL, &(self->tid));
#endif
if (result != 0) { // Not a child process.
__timer_table_start_stop(0);
__bionic_atfork_run_parent();

View File

@ -50,6 +50,7 @@ extern "C" abort_msg_t** __abort_message_ptr;
extern "C" uintptr_t __get_sp(void);
extern "C" int __system_properties_init(void);
extern "C" int __set_tls(void* ptr);
extern "C" int __set_tid_address(int* tid_address);
// Not public, but well-known in the BSDs.
const char* __progname;
@ -90,17 +91,24 @@ void __libc_init_tls(KernelArgumentBlock& args) {
uintptr_t stack_bottom = stack_top - stack_size;
static void* tls[BIONIC_TLS_SLOTS];
static pthread_internal_t thread;
thread.tid = gettid();
thread.tls = tls;
pthread_attr_init(&thread.attr);
pthread_attr_setstack(&thread.attr, (void*) stack_bottom, stack_size);
_init_thread(&thread, false);
__init_tls(&thread);
__set_tls(thread.tls);
static pthread_internal_t main_thread;
main_thread.tls = tls;
// Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
main_thread.tid = __set_tid_address(&main_thread.tid);
// We already have a stack, and we don't want to free it up on exit (because things like
// environment variables with global scope live on it).
pthread_attr_init(&main_thread.attr);
pthread_attr_setstack(&main_thread.attr, (void*) stack_bottom, stack_size);
main_thread.attr.flags = PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
_init_thread(&main_thread, false);
__init_tls(&main_thread);
__set_tls(main_thread.tls);
tls[TLS_SLOT_BIONIC_PREINIT] = &args;
__init_alternate_signal_stack(&thread);
__init_alternate_signal_stack(&main_thread);
}
void __libc_init_common(KernelArgumentBlock& args) {

View File

@ -97,7 +97,6 @@ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list) {
}
}
pthread_cond_init(&thread->join_cond, NULL);
thread->cleanup_stack = NULL;
if (add_to_thread_list) {
@ -215,17 +214,22 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
// the new thread.
pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
pthread_mutex_init(start_mutex, NULL);
ScopedPthreadMutexLocker start_locker(start_mutex);
pthread_mutex_lock(start_mutex);
thread->tls[TLS_SLOT_THREAD_ID] = thread;
thread->start_routine = start_routine;
thread->start_routine_arg = arg;
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS;
int tid = __bionic_clone(flags, child_stack, NULL, thread->tls, NULL, __pthread_start, thread);
if (tid < 0) {
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
int rc = __bionic_clone(flags, child_stack, &(thread->tid), thread->tls, &(thread->tid), __pthread_start, thread);
if (rc == -1) {
int clone_errno = errno;
// We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
// be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
// reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
pthread_mutex_unlock(start_mutex);
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {
munmap(thread->attr.stack_base, thread->attr.stack_size);
}
@ -234,12 +238,10 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
return clone_errno;
}
thread->tid = tid;
int init_errno = _init_thread(thread, true);
if (init_errno != 0) {
// Mark the thread detached and let its __pthread_start run to
// completion. (It'll just exit immediately, cleaning up its resources.)
// Mark the thread detached and let its __pthread_start run to completion.
// It'll check this flag and exit immediately, cleaning up its resources.
thread->internal_flags |= PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED;
thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
return init_errno;
@ -251,8 +253,9 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
_thread_created_hook(thread->tid);
}
// Publish the pthread_t and let the thread run.
*thread_out = (pthread_t) thread;
// Publish the pthread_t and unlock the mutex to let the new thread start running.
*thread_out = reinterpret_cast<pthread_t>(thread);
pthread_mutex_unlock(start_mutex);
return 0;
}

View File

@ -57,8 +57,9 @@ void __pthread_cleanup_pop(__pthread_cleanup_t* c, int execute) {
}
}
void pthread_exit(void* retval) {
void pthread_exit(void* return_value) {
pthread_internal_t* thread = __get_thread();
thread->return_value = return_value;
// Call the cleanup handlers first.
while (thread->cleanup_stack) {
@ -90,10 +91,9 @@ void pthread_exit(void* retval) {
size_t stack_size = thread->attr.stack_size;
bool user_allocated_stack = ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0);
// If the thread is detached, destroy the pthread_internal_t,
// otherwise keep it in memory and signal any joiners.
pthread_mutex_lock(&gThreadListLock);
if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
// The thread is detached, so we can destroy the pthread_internal_t.
_pthread_internal_remove_locked(thread);
} else {
// Make sure that the pthread_internal_t doesn't have stale pointers to a stack that
@ -103,15 +103,8 @@ void pthread_exit(void* retval) {
thread->attr.stack_size = 0;
thread->tls = NULL;
}
// Indicate that the thread has exited for joining threads.
thread->attr.flags |= PTHREAD_ATTR_FLAG_ZOMBIE;
thread->return_value = retval;
// Signal the joining thread if present.
if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
pthread_cond_signal(&thread->join_cond);
}
// pthread_join is responsible for destroying the pthread_internal_t for non-detached threads.
// The kernel will futex_wake on the pthread_internal_t::tid field to wake pthread_join.
}
pthread_mutex_unlock(&gThreadListLock);
@ -131,6 +124,6 @@ void pthread_exit(void* retval) {
_exit_with_stack_teardown(stack_base, stack_size, 0);
}
/* NOTREACHED, but we told the compiler this function is noreturn, and it doesn't believe us. */
// NOTREACHED, but we told the compiler this function is noreturn, and it doesn't believe us.
abort();
}

View File

@ -31,28 +31,31 @@
#include <pthread.h>
struct pthread_internal_t {
struct pthread_internal_t* next;
struct pthread_internal_t* prev;
pthread_attr_t attr;
pid_t tid;
bool allocated_on_heap;
pthread_cond_t join_cond;
void* return_value;
int internal_flags;
__pthread_cleanup_t* cleanup_stack;
void** tls; /* thread-local storage area */
struct pthread_internal_t* next;
struct pthread_internal_t* prev;
void* (*start_routine)(void*);
void* start_routine_arg;
pid_t tid;
void* alternate_signal_stack;
void** tls;
/*
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this
* per-thread buffer by simply using malloc(3) and free(3).
*/
pthread_attr_t attr;
bool allocated_on_heap; /* TODO: move this into attr.flags? */
int internal_flags; /* TODO: move this into attr.flags? */
__pthread_cleanup_t* cleanup_stack;
void* (*start_routine)(void*);
void* start_routine_arg;
void* return_value;
void* alternate_signal_stack;
/*
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this
* per-thread buffer by simply using malloc(3) and free(3).
*/
#define __BIONIC_DLERROR_BUFFER_SIZE 512
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
};
__LIBC_HIDDEN__ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list);
@ -73,9 +76,6 @@ __LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread)
/* Has the thread been joined by another thread? */
#define PTHREAD_ATTR_FLAG_JOINED 0x00000004
/* Has the thread already exited but not been joined? */
#define PTHREAD_ATTR_FLAG_ZOMBIE 0x00000008
#define PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED 1
/*

View File

@ -28,33 +28,50 @@
#include <errno.h>
#include "private/bionic_futex.h"
#include "pthread_accessor.h"
int pthread_join(pthread_t t, void** ret_val) {
int pthread_join(pthread_t t, void** return_value) {
if (t == pthread_self()) {
return EDEADLK;
}
pthread_accessor thread(t);
if (thread.get() == NULL) {
pid_t tid;
volatile int* tid_ptr;
{
pthread_accessor thread(t);
if (thread.get() == NULL) {
return ESRCH;
}
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
return EINVAL;
}
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) != 0) {
return EINVAL;
}
// Okay, looks like we can signal our intention to join.
thread->attr.flags |= PTHREAD_ATTR_FLAG_JOINED;
tid = thread->tid;
tid_ptr = &thread->tid;
}
if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
return EINVAL;
// We set the PTHREAD_ATTR_FLAG_JOINED flag with the lock held,
// so no one is going to remove this thread except us.
// Wait for the thread to actually exit, if it hasn't already.
while (*tid_ptr != 0) {
__futex_wait(tid_ptr, tid, NULL);
}
if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
return EINVAL;
}
// Take the lock again so we can pull the thread's return value
// and remove the thread from the list.
pthread_accessor thread(t);
// Signal our intention to join, and wait for the thread to exit.
thread->attr.flags |= PTHREAD_ATTR_FLAG_JOINED;
while ((thread->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) == 0) {
pthread_cond_wait(&thread->join_cond, &gThreadListLock);
}
if (ret_val) {
*ret_val = thread->return_value;
if (return_value) {
*return_value = thread->return_value;
}
_pthread_internal_remove_locked(thread.get());

View File

@ -218,7 +218,7 @@ int pthread_key_delete(pthread_key_t key) {
// startup trampoline (__pthread_start) hasn't been run yet by the
// scheduler. t->tls will also be NULL after a thread's stack has been
// unmapped but before the ongoing pthread_join() is finished.
if ((t->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) || t->tls == NULL) {
if (t->tid == 0 || t->tls == NULL) {
continue;
}

View File

@ -28,6 +28,7 @@
#ifndef _BIONIC_FUTEX_H
#define _BIONIC_FUTEX_H
#include <linux/compiler.h> /* needed for __user in non-uapi futex.h */
#include <linux/futex.h>
#include <sys/cdefs.h>

View File

@ -51,7 +51,9 @@ enum {
TLS_SLOT_THREAD_ID,
TLS_SLOT_ERRNO,
/* This slot is used when starting a new thread, before any code that needs errno runs. */
/* This slot in the child's TLS is used to synchronize the parent and child
* during thread initialization. The child finishes with this mutex before
* running any code that can set errno, so we can reuse the errno slot. */
TLS_SLOT_START_MUTEX = TLS_SLOT_ERRNO,
/* These two aren't used by bionic itself, but allow the graphics code to

View File

@ -150,22 +150,48 @@ TEST(pthread, pthread_join_self) {
ASSERT_EQ(EDEADLK, pthread_join(pthread_self(), &result));
}
#if __BIONIC__ // For some reason, gtest on bionic can cope with this but gtest on glibc can't.
struct TestBug37410 {
pthread_t main_thread;
pthread_mutex_t mutex;
static void TestBug37410() {
pthread_t t1;
ASSERT_EQ(0, pthread_create(&t1, NULL, JoinFn, reinterpret_cast<void*>(pthread_self())));
pthread_exit(NULL);
}
static void main() {
TestBug37410 data;
data.main_thread = pthread_self();
ASSERT_EQ(0, pthread_mutex_init(&data.mutex, NULL));
ASSERT_EQ(0, pthread_mutex_lock(&data.mutex));
pthread_t t;
ASSERT_EQ(0, pthread_create(&t, NULL, TestBug37410::thread_fn, reinterpret_cast<void*>(&data)));
// Wait for the thread to be running...
ASSERT_EQ(0, pthread_mutex_lock(&data.mutex));
ASSERT_EQ(0, pthread_mutex_unlock(&data.mutex));
// ...and exit.
pthread_exit(NULL);
}
private:
static void* thread_fn(void* arg) {
TestBug37410* data = reinterpret_cast<TestBug37410*>(arg);
// Let the main thread know we're running.
pthread_mutex_unlock(&data->mutex);
// And wait for the main thread to exit.
pthread_join(data->main_thread, NULL);
return NULL;
}
};
// Even though this isn't really a death test, we have to say "DeathTest" here so gtest knows to
// run this test (which exits normally) in its own process.
TEST(pthread_DeathTest, pthread_bug_37410) {
// http://code.google.com/p/android/issues/detail?id=37410
::testing::FLAGS_gtest_death_test_style = "threadsafe";
ASSERT_EXIT(TestBug37410(), ::testing::ExitedWithCode(0), "");
ASSERT_EXIT(TestBug37410::main(), ::testing::ExitedWithCode(0), "");
}
#endif
static void* SignalHandlerFn(void* arg) {
sigset_t wait_set;

View File

@ -19,6 +19,7 @@
#include <errno.h>
#include <libgen.h>
#include <limits.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
@ -132,3 +133,27 @@ TEST(stdlib, qsort) {
ASSERT_STREQ("bravo", entries[1].name);
ASSERT_STREQ("charlie", entries[2].name);
}
static void* TestBug57421_child(void* arg) {
pthread_t main_thread = reinterpret_cast<pthread_t>(arg);
pthread_join(main_thread, NULL);
char* value = getenv("ENVIRONMENT_VARIABLE");
if (value == NULL) {
setenv("ENVIRONMENT_VARIABLE", "value", 1);
}
return NULL;
}
static void TestBug57421_main() {
pthread_t t;
ASSERT_EQ(0, pthread_create(&t, NULL, TestBug57421_child, reinterpret_cast<void*>(pthread_self())));
pthread_exit(NULL);
}
// Even though this isn't really a death test, we have to say "DeathTest" here so gtest knows to
// run this test (which exits normally) in its own process.
TEST(stdlib_DeathTest, getenv_after_main_thread_exits) {
// https://code.google.com/p/android/issues/detail?id=57421
::testing::FLAGS_gtest_death_test_style = "threadsafe";
ASSERT_EXIT(TestBug57421_main(), ::testing::ExitedWithCode(0), "");
}