diff --git a/libc/arch-arm/bionic/_exit_with_stack_teardown.S b/libc/arch-arm/bionic/_exit_with_stack_teardown.S index c430edbe4..0d97f0613 100644 --- a/libc/arch-arm/bionic/_exit_with_stack_teardown.S +++ b/libc/arch-arm/bionic/_exit_with_stack_teardown.S @@ -33,12 +33,11 @@ ENTRY(_exit_with_stack_teardown) mov lr, r2 ldr r7, =__NR_munmap - swi #0 // the stack is destroyed by this call + swi #0 + // If munmap failed, we ignore the failure and exit anyway. + mov r0, lr ldr r7, =__NR_exit swi #0 - - // exit() should never return, cause a crash if it does - mov r0, #0 - ldr r0, [r0] + // The exit syscall does not return. END(_exit_with_stack_teardown) diff --git a/libc/arch-arm/bionic/clone.S b/libc/arch-arm/bionic/clone.S index 3556b8ed4..0782abefb 100644 --- a/libc/arch-arm/bionic/clone.S +++ b/libc/arch-arm/bionic/clone.S @@ -28,53 +28,7 @@ #include -// int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg); -ENTRY(__pthread_clone) - # Push 'fn' and 'arg' onto 'child_stack'. - stmdb r1!, {r0, r3} - - # The sys_clone system call only takes two arguments: 'flags' and 'child_stack'. - # 'child_stack' is already in r1, but we need to move 'flags' into position. - mov r0, r2 - - # System call. - mov ip, r7 - ldr r7, =__NR_clone - swi #0 - - # Child? - movs r0, r0 - beq 1f - - # Parent. - mov r7, ip - cmn r0, #(MAX_ERRNO + 1) - bxls lr - neg r0, r0 - b __set_errno - -1: # Child. - # Pop 'fn' and 'arg' back off the stack and call __thread_entry. - pop {r0, r1} - # __thread_entry also needs our stack pointer. - mov r2, sp - b __thread_entry -END(__pthread_clone) - - - # - # This function is defined as: - # - # pid_t __bionic_clone( int flags, void *child_stack, - # pid_t *pid, void *tls, pid_t *ctid, - # int (*fn)(void *), void* arg ); - # - # NOTE: This is not the same signature as the glibc - # __clone function. Placing 'fn' and 'arg' - # at the end of the parameter list makes the - # implementation much simpler. - # - +// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); ENTRY(__bionic_clone) mov ip, sp .save {r4, r5, r6, r7} diff --git a/libc/arch-mips/bionic/_exit_with_stack_teardown.S b/libc/arch-mips/bionic/_exit_with_stack_teardown.S index 8351e2ead..9cab52b27 100644 --- a/libc/arch-mips/bionic/_exit_with_stack_teardown.S +++ b/libc/arch-mips/bionic/_exit_with_stack_teardown.S @@ -30,7 +30,7 @@ .text -// void _exit_with_stack_teardown(void * stackBase, size_t stackSize, int status) +// void _exit_with_stack_teardown(void* stackBase, size_t stackSize, int status) .type _exit_with_stack_teardown, @function .global _exit_with_stack_teardown @@ -40,12 +40,11 @@ _exit_with_stack_teardown: move $s0,$a2 /* preserve status for exit() call */ li $v0,__NR_munmap - syscall /* the stack is destroyed by this call */ + syscall + // If munmap failed, we ignore the failure and exit anyway. + move $a0,$s0 li $v0,__NR_exit syscall - - /* exit() should never return, cause a crash if it does */ - move $a0,$0 - lw $a0,($a0) + // The exit syscall does not return. .end _exit_with_stack_teardown diff --git a/libc/arch-mips/bionic/clone.S b/libc/arch-mips/bionic/clone.S index 94e18a254..8970b6ec7 100644 --- a/libc/arch-mips/bionic/clone.S +++ b/libc/arch-mips/bionic/clone.S @@ -30,78 +30,7 @@ #include #include - .text - .type __pthread_clone, @function - .global __pthread_clone - .align 4 - .ent __pthread_clone - -/* - * int __pthread_clone(void* (*fn)(void*), void *child_stack, - * int flags, void *arg); - */ - -__pthread_clone: - .set noreorder - .cpload $t9 - .set reorder - - # set up child stack - subu $a1,16 - sw $a0,0($a1) # fn - sw $a3,4($a1) # arg -# sw $a1+16,8($a1) # tls - - /* - * int sys_clone(int flags, void *child_stack, int *parent_tidptr, - * struct user_desc *newtls, int *child_tidptr); - */ - - move $a0,$a2 # flags -# move $a1,$a1 # child_stack - move $a2,$0 # parent_tidptr - move $a3,$0 # user_desc - and $a0,~(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) - # make sure the kernel doesn't access child_tidptr - - li $v0,__NR_clone - syscall - - bnez $a3,.L__error - - beqz $v0,.L__thread_start - - j $ra - -.L__thread_start: - lw $a0,0($sp) # fn - lw $a1,4($sp) # arg - addu $a2,$sp,16 # tls - - # void __thread_entry(void* (*func)(void*), void *arg, void *tls) - la $t9, __thread_entry - j $t9 - -.L__error: - move $a0,$v0 - la $t9,__set_errno - j $t9 - - .end __pthread_clone - - - # - # This function is defined as: - # - # pid_t __bionic_clone( int flags, void *child_stack, - # pid_t *pid, void *tls, pid_t *ctid, - # int (*fn)(void *), void* arg ); - # - # NOTE: This is not the same signature than the GLibc - # __clone function here !! Placing 'fn' and 'arg' - # at the end of the parameter list makes the - # implementation much simpler. - # +// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); .text .type __bionic_clone, @function .global __bionic_clone diff --git a/libc/arch-x86/bionic/_exit_with_stack_teardown.S b/libc/arch-x86/bionic/_exit_with_stack_teardown.S index 1c6d48adc..9128f10d8 100644 --- a/libc/arch-x86/bionic/_exit_with_stack_teardown.S +++ b/libc/arch-x86/bionic/_exit_with_stack_teardown.S @@ -6,17 +6,15 @@ ENTRY(_exit_with_stack_teardown) // We can trash %ebx here since this call should never return. // We can also take advantage of the fact that the linux syscall trap // handler saves all the registers, so we don't need a stack to keep - // the status argument for exit while doing the munmap */ + // the status argument for exit while doing the munmap. mov 4(%esp), %ebx // stackBase mov 8(%esp), %ecx // stackSize mov $__NR_munmap, %eax int $0x80 - // If munmap failed, we ignore the failure and exit anyway. mov %edx, %ebx // status movl $__NR_exit, %eax int $0x80 - // The exit syscall does not return. END(_exit_with_stack_teardown) diff --git a/libc/arch-x86/bionic/clone.S b/libc/arch-x86/bionic/clone.S index 457cb4a8f..eb9f54524 100644 --- a/libc/arch-x86/bionic/clone.S +++ b/libc/arch-x86/bionic/clone.S @@ -1,68 +1,7 @@ #include #include -// int __pthread_clone(void* (*fn)(void*), void* tls, int flags, void* arg); -ENTRY(__pthread_clone) - pushl %ebx - pushl %ecx - movl 16(%esp), %ecx - - # save tls - movl %ecx, %ebx - # 16-byte alignment on child stack - andl $~15, %ecx - - # insert arguments onto the child stack - movl 12(%esp), %eax - movl %eax, -16(%ecx) - movl 24(%esp), %eax - movl %eax, -12(%ecx) - movl %ebx, -8(%ecx) - - subl $16, %ecx - movl 20(%esp), %ebx - - # make system call - movl $__NR_clone, %eax - int $0x80 - - cmpl $0, %eax - je pc_child - jg pc_parent - - # an error occurred, set errno and return -1 - negl %eax - pushl %eax - call __set_errno - addl $4, %esp - orl $-1, %eax - jmp pc_return - -pc_child: - # we're in the child thread now, call __thread_entry - # with the appropriate arguments on the child stack - # we already placed most of them - call __thread_entry - hlt - -pc_parent: - # we're the parent; nothing to do. -pc_return: - popl %ecx - popl %ebx - ret -END(__pthread_clone) - - -/* - * int __bionic_clone(unsigned long clone_flags, - * void* newsp, - * int *parent_tidptr, - * void *new_tls, - * int *child_tidptr, - * int (*fn)(void *), - * void *arg); - */ +// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); ENTRY(__bionic_clone) pushl %ebx pushl %esi diff --git a/libc/arch-x86_64/bionic/_exit_with_stack_teardown.S b/libc/arch-x86_64/bionic/_exit_with_stack_teardown.S index a09babe2e..eca3b68f9 100644 --- a/libc/arch-x86_64/bionic/_exit_with_stack_teardown.S +++ b/libc/arch-x86_64/bionic/_exit_with_stack_teardown.S @@ -36,12 +36,10 @@ ENTRY(_exit_with_stack_teardown) // the status argument for exit(2) while doing the munmap(2). mov $__NR_munmap, %eax syscall - - // If munmap failed, ignore the failure and exit anyway. + // If munmap failed, we ignore the failure and exit anyway. mov %rdx, %rdi // status mov $__NR_exit, %eax syscall - // The exit syscall does not return. END(_exit_with_stack_teardown) diff --git a/libc/arch-x86_64/bionic/clone.S b/libc/arch-x86_64/bionic/clone.S index 7511e8673..b37416b44 100644 --- a/libc/arch-x86_64/bionic/clone.S +++ b/libc/arch-x86_64/bionic/clone.S @@ -29,53 +29,7 @@ #include #include -// int __pthread_clone(void* (*fn)(void*), void* tls, int flags, void* arg); -ENTRY(__pthread_clone) - # Save tls. - movq %rsi, %r11 - # Enforce 16-byte alignment for child stack. - andq $~15, %rsi - - # Copy 'fn', 'arg', and 'tls' onto the child stack. - movq %rdi, -32(%rsi) # fn - movq %rcx, -24(%rsi) # arg - movq %r11, -16(%rsi) # tls - subq $32, %rsi - - movq %rdx, %rdi - movl $__NR_clone, %eax - syscall - testl %eax, %eax - jns 1f - - # An error occurred, set errno and return -1. - negl %eax - movl %eax, %edi - call __set_errno - orl $-1, %eax - jmp 2f -1: - jnz 2f - - # We're in the child now, so call __thread_entry - # with the arguments from the child stack moved into - # the appropriate registers. We avoid pop here to keep - # the required 16-byte stack alignment. - movq (%rsp), %rdi # fn - movq 8(%rsp), %rsi # arg - movq 16(%rsp), %rdx # tls - call __thread_entry - hlt -2: - ret - -// int __bionic_clone(unsigned long clone_flags, -// void* new_sp, -// int* parent_tid_ptr, -// void* new_tls, -// int* child_tid_ptr, -// int (*fn)(void*), -// void* arg); +// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); ENTRY(__bionic_clone) # Enforce 16-byte alignment for child stack. andq $~15, %rsi diff --git a/libc/bionic/__thread_entry.cpp b/libc/bionic/__thread_entry.cpp index 8300a647d..3505f8b5b 100644 --- a/libc/bionic/__thread_entry.cpp +++ b/libc/bionic/__thread_entry.cpp @@ -32,26 +32,28 @@ #include "private/bionic_tls.h" -// This trampoline is called from the assembly _pthread_clone function. -// Our 'tls' and __pthread_clone's 'child_stack' are one and the same, just growing in -// opposite directions. -extern "C" void __thread_entry(void* (*func)(void*), void* arg, void** tls) { +// This trampoline is called from the assembly __bionic_clone function. +int __thread_entry(void* arg) { + pthread_internal_t* thread = reinterpret_cast(arg); + // Wait for our creating thread to release us. This lets it have time to // notify gdb about this thread before we start doing anything. // This also provides the memory barrier needed to ensure that all memory // accesses previously made by the creating thread are visible to us. - pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF]; + pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX]; pthread_mutex_lock(start_mutex); pthread_mutex_destroy(start_mutex); - pthread_internal_t* thread = (pthread_internal_t*) tls[TLS_SLOT_THREAD_ID]; - thread->tls = tls; __init_tls(thread); + __init_alternate_signal_stack(thread); + if ((thread->internal_flags & PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED) != 0) { pthread_exit(NULL); } - void* result = func(arg); + void* result = thread->start_routine(thread->start_routine_arg); pthread_exit(result); + + return 0; } diff --git a/libc/bionic/bionic_clone.c b/libc/bionic/bionic_clone.c index 8a17e135e..518d99634 100644 --- a/libc/bionic/bionic_clone.c +++ b/libc/bionic/bionic_clone.c @@ -31,14 +31,7 @@ #include #include -extern int __bionic_clone(unsigned long clone_flags, - void* newsp, - int *parent_tidptr, - void *new_tls, - int *child_tidptr, - int (*fn)(void *), - void *arg); - +extern pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg); extern void __exit(int status); /* this function is called from the __bionic_clone diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp index f88a26d26..3e092ae41 100644 --- a/libc/bionic/libc_init_common.cpp +++ b/libc/bionic/libc_init_common.cpp @@ -49,6 +49,7 @@ extern "C" abort_msg_t** __abort_message_ptr; extern "C" uintptr_t __get_sp(void); extern "C" int __system_properties_init(void); +extern "C" int __set_tls(void* ptr); // Not public, but well-known in the BSDs. const char* __progname; @@ -96,7 +97,10 @@ void __libc_init_tls(KernelArgumentBlock& args) { pthread_attr_setstack(&thread.attr, (void*) stack_bottom, stack_size); _init_thread(&thread, false); __init_tls(&thread); + __set_tls(thread.tls); tls[TLS_SLOT_BIONIC_PREINIT] = &args; + + __init_alternate_signal_stack(&thread); } void __libc_init_common(KernelArgumentBlock& args) { diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp index 21533108f..386e8d147 100644 --- a/libc/bionic/pthread_create.cpp +++ b/libc/bionic/pthread_create.cpp @@ -40,7 +40,7 @@ #include "private/ErrnoRestorer.h" #include "private/ScopedPthreadMutexLocker.h" -extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg); +extern "C" pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg); #ifdef __i386__ #define ATTRIBUTES __attribute__((noinline)) __attribute__((fastcall)) @@ -50,15 +50,14 @@ extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, extern "C" void ATTRIBUTES _thread_created_hook(pid_t thread_id); -extern "C" int __set_tls(void* ptr); - static pthread_mutex_t gPthreadStackCreationLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t gDebuggerNotificationLock = PTHREAD_MUTEX_INITIALIZER; +// This code is used both by each new pthread and the code that initializes the main thread. void __init_tls(pthread_internal_t* thread) { - // Zero-initialize all the slots. - for (size_t i = 0; i < BIONIC_TLS_SLOTS; ++i) { + // Zero-initialize all the slots after TLS_SLOT_SELF and TLS_SLOT_THREAD_ID. + for (size_t i = TLS_SLOT_ERRNO; i < BIONIC_TLS_SLOTS; ++i) { thread->tls[i] = NULL; } @@ -67,11 +66,10 @@ void __init_tls(pthread_internal_t* thread) { thread->tls[TLS_SLOT_THREAD_ID] = thread; // GCC looks in the TLS for the stack guard on x86, so copy it there from our global. thread->tls[TLS_SLOT_STACK_GUARD] = (void*) __stack_chk_guard; +} - __set_tls(thread->tls); - +void __init_alternate_signal_stack(pthread_internal_t* thread) { // Create and set an alternate signal stack. - // This must happen after __set_tls, in case a system call fails and tries to set errno. stack_t ss; ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (ss.ss_sp != MAP_FAILED) { @@ -181,24 +179,26 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, // The child stack is the same address, just growing in the opposite direction. // At offsets >= 0, we have the TLS slots. // At offsets < 0, we have the child stack. - void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*)); - void* child_stack = tls; + thread->tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*)); + void* child_stack = thread->tls; - // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep + // Create a mutex for the thread in TLS to wait on once it starts so we can keep // it from doing anything until after we notify the debugger about it // // This also provides the memory barrier we need to ensure that all // memory accesses previously performed by this thread are visible to // the new thread. - pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF]; + pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX]; pthread_mutex_init(start_mutex, NULL); ScopedPthreadMutexLocker start_locker(start_mutex); - tls[TLS_SLOT_THREAD_ID] = thread; + thread->tls[TLS_SLOT_THREAD_ID] = thread; - int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; + thread->start_routine = start_routine; + thread->start_routine_arg = arg; - int tid = __pthread_clone(start_routine, child_stack, flags, arg); + int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS; + int tid = __bionic_clone(flags, child_stack, NULL, thread->tls, NULL, __thread_entry, thread); if (tid < 0) { int clone_errno = errno; if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) { diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h index 8cca83aa9..52cfbcecf 100644 --- a/libc/bionic/pthread_internal.h +++ b/libc/bionic/pthread_internal.h @@ -42,6 +42,9 @@ struct pthread_internal_t { __pthread_cleanup_t* cleanup_stack; void** tls; /* thread-local storage area */ + void* (*start_routine)(void*); + void* start_routine_arg; + void* alternate_signal_stack; /* @@ -52,8 +55,12 @@ struct pthread_internal_t { char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE]; }; +extern "C" { + __LIBC_HIDDEN__ int __thread_entry(void* arg); // Called from assembler. +} __LIBC_HIDDEN__ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list); __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread); +__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*); __LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread); __LIBC_HIDDEN__ pthread_internal_t* __get_thread(void); diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h index a14bd3c38..094c71c06 100644 --- a/libc/private/bionic_tls.h +++ b/libc/private/bionic_tls.h @@ -51,6 +51,9 @@ enum { TLS_SLOT_THREAD_ID, TLS_SLOT_ERRNO, + /* This slot is used when starting a new thread, before any code that needs errno runs. */ + TLS_SLOT_START_MUTEX = TLS_SLOT_ERRNO, + /* These two aren't used by bionic itself, but allow the graphics code to * access TLS directly rather than using the pthread API. */ TLS_SLOT_OPENGL_API = 3, diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp index 42bd2b94f..f6f61b122 100644 --- a/tests/pthread_test.cpp +++ b/tests/pthread_test.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include TEST(pthread, pthread_key_create) { @@ -214,11 +215,12 @@ TEST(pthread, pthread_sigmask) { } #if __BIONIC__ -extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg); -TEST(pthread, __pthread_clone) { +extern "C" pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); +TEST(pthread, __bionic_clone) { + // Check that our hand-written clone assembler sets errno correctly on failure. uintptr_t fake_child_stack[16]; errno = 0; - ASSERT_EQ(-1, __pthread_clone(NULL, &fake_child_stack[0], CLONE_THREAD, NULL)); + ASSERT_EQ(-1, __bionic_clone(CLONE_THREAD, &fake_child_stack[0], NULL, NULL, NULL, NULL, NULL)); ASSERT_EQ(EINVAL, errno); } #endif @@ -373,6 +375,24 @@ TEST(pthread, pthread_join__multijoin) { ASSERT_EQ(0U, reinterpret_cast(join_result)); } +TEST(pthread, pthread_join__race) { + // http://b/11693195 --- pthread_join could return before the thread had actually exited. + // If the joiner unmapped the thread's stack, that could lead to SIGSEGV in the thread. + for (size_t i = 0; i < 1024; ++i) { + size_t stack_size = 64*1024; + void* stack = mmap(NULL, stack_size, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); + + pthread_attr_t a; + pthread_attr_init(&a); + pthread_attr_setstack(&a, stack, stack_size); + + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, &a, IdFn, NULL)); + ASSERT_EQ(0, pthread_join(t, NULL)); + ASSERT_EQ(0, munmap(stack, stack_size)); + } +} + static void* GetActualGuardSizeFn(void* arg) { pthread_attr_t attributes; pthread_getattr_np(pthread_self(), &attributes);