am 645d4269: Merge "Switch pthread_create over to __bionic_clone."

* commit '645d4269d323acdcb20d1c31c15dbb4afbb9f46a':
  Switch pthread_create over to __bionic_clone.
This commit is contained in:
Elliott Hughes 2013-11-15 14:47:32 -08:00 committed by Android Git Automerger
commit 5b5248526b
15 changed files with 78 additions and 279 deletions

View File

@ -33,12 +33,11 @@
ENTRY(_exit_with_stack_teardown)
mov lr, r2
ldr r7, =__NR_munmap
swi #0 // the stack is destroyed by this call
swi #0
// If munmap failed, we ignore the failure and exit anyway.
mov r0, lr
ldr r7, =__NR_exit
swi #0
// exit() should never return, cause a crash if it does
mov r0, #0
ldr r0, [r0]
// The exit syscall does not return.
END(_exit_with_stack_teardown)

View File

@ -28,53 +28,7 @@
#include <private/bionic_asm.h>
// int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
ENTRY(__pthread_clone)
# Push 'fn' and 'arg' onto 'child_stack'.
stmdb r1!, {r0, r3}
# The sys_clone system call only takes two arguments: 'flags' and 'child_stack'.
# 'child_stack' is already in r1, but we need to move 'flags' into position.
mov r0, r2
# System call.
mov ip, r7
ldr r7, =__NR_clone
swi #0
# Child?
movs r0, r0
beq 1f
# Parent.
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
1: # Child.
# Pop 'fn' and 'arg' back off the stack and call __thread_entry.
pop {r0, r1}
# __thread_entry also needs our stack pointer.
mov r2, sp
b __thread_entry
END(__pthread_clone)
#
# This function is defined as:
#
# pid_t __bionic_clone( int flags, void *child_stack,
# pid_t *pid, void *tls, pid_t *ctid,
# int (*fn)(void *), void* arg );
#
# NOTE: This is not the same signature as the glibc
# __clone function. Placing 'fn' and 'arg'
# at the end of the parameter list makes the
# implementation much simpler.
#
// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
ENTRY(__bionic_clone)
mov ip, sp
.save {r4, r5, r6, r7}

View File

@ -30,7 +30,7 @@
.text
// void _exit_with_stack_teardown(void * stackBase, size_t stackSize, int status)
// void _exit_with_stack_teardown(void* stackBase, size_t stackSize, int status)
.type _exit_with_stack_teardown, @function
.global _exit_with_stack_teardown
@ -40,12 +40,11 @@ _exit_with_stack_teardown:
move $s0,$a2 /* preserve status for exit() call */
li $v0,__NR_munmap
syscall /* the stack is destroyed by this call */
syscall
// If munmap failed, we ignore the failure and exit anyway.
move $a0,$s0
li $v0,__NR_exit
syscall
/* exit() should never return, cause a crash if it does */
move $a0,$0
lw $a0,($a0)
// The exit syscall does not return.
.end _exit_with_stack_teardown

View File

@ -30,78 +30,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
.text
.type __pthread_clone, @function
.global __pthread_clone
.align 4
.ent __pthread_clone
/*
* int __pthread_clone(void* (*fn)(void*), void *child_stack,
* int flags, void *arg);
*/
__pthread_clone:
.set noreorder
.cpload $t9
.set reorder
# set up child stack
subu $a1,16
sw $a0,0($a1) # fn
sw $a3,4($a1) # arg
# sw $a1+16,8($a1) # tls
/*
* int sys_clone(int flags, void *child_stack, int *parent_tidptr,
* struct user_desc *newtls, int *child_tidptr);
*/
move $a0,$a2 # flags
# move $a1,$a1 # child_stack
move $a2,$0 # parent_tidptr
move $a3,$0 # user_desc
and $a0,~(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
# make sure the kernel doesn't access child_tidptr
li $v0,__NR_clone
syscall
bnez $a3,.L__error
beqz $v0,.L__thread_start
j $ra
.L__thread_start:
lw $a0,0($sp) # fn
lw $a1,4($sp) # arg
addu $a2,$sp,16 # tls
# void __thread_entry(void* (*func)(void*), void *arg, void *tls)
la $t9, __thread_entry
j $t9
.L__error:
move $a0,$v0
la $t9,__set_errno
j $t9
.end __pthread_clone
#
# This function is defined as:
#
# pid_t __bionic_clone( int flags, void *child_stack,
# pid_t *pid, void *tls, pid_t *ctid,
# int (*fn)(void *), void* arg );
#
# NOTE: This is not the same signature than the GLibc
# __clone function here !! Placing 'fn' and 'arg'
# at the end of the parameter list makes the
# implementation much simpler.
#
// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
.text
.type __bionic_clone, @function
.global __bionic_clone

View File

@ -6,17 +6,15 @@ ENTRY(_exit_with_stack_teardown)
// We can trash %ebx here since this call should never return.
// We can also take advantage of the fact that the linux syscall trap
// handler saves all the registers, so we don't need a stack to keep
// the status argument for exit while doing the munmap */
// the status argument for exit while doing the munmap.
mov 4(%esp), %ebx // stackBase
mov 8(%esp), %ecx // stackSize
mov $__NR_munmap, %eax
int $0x80
// If munmap failed, we ignore the failure and exit anyway.
mov %edx, %ebx // status
movl $__NR_exit, %eax
int $0x80
// The exit syscall does not return.
END(_exit_with_stack_teardown)

View File

@ -1,68 +1,7 @@
#include <asm/unistd.h>
#include <machine/asm.h>
// int __pthread_clone(void* (*fn)(void*), void* tls, int flags, void* arg);
ENTRY(__pthread_clone)
pushl %ebx
pushl %ecx
movl 16(%esp), %ecx
# save tls
movl %ecx, %ebx
# 16-byte alignment on child stack
andl $~15, %ecx
# insert arguments onto the child stack
movl 12(%esp), %eax
movl %eax, -16(%ecx)
movl 24(%esp), %eax
movl %eax, -12(%ecx)
movl %ebx, -8(%ecx)
subl $16, %ecx
movl 20(%esp), %ebx
# make system call
movl $__NR_clone, %eax
int $0x80
cmpl $0, %eax
je pc_child
jg pc_parent
# an error occurred, set errno and return -1
negl %eax
pushl %eax
call __set_errno
addl $4, %esp
orl $-1, %eax
jmp pc_return
pc_child:
# we're in the child thread now, call __thread_entry
# with the appropriate arguments on the child stack
# we already placed most of them
call __thread_entry
hlt
pc_parent:
# we're the parent; nothing to do.
pc_return:
popl %ecx
popl %ebx
ret
END(__pthread_clone)
/*
* int __bionic_clone(unsigned long clone_flags,
* void* newsp,
* int *parent_tidptr,
* void *new_tls,
* int *child_tidptr,
* int (*fn)(void *),
* void *arg);
*/
// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
ENTRY(__bionic_clone)
pushl %ebx
pushl %esi

View File

@ -36,12 +36,10 @@ ENTRY(_exit_with_stack_teardown)
// the status argument for exit(2) while doing the munmap(2).
mov $__NR_munmap, %eax
syscall
// If munmap failed, ignore the failure and exit anyway.
// If munmap failed, we ignore the failure and exit anyway.
mov %rdx, %rdi // status
mov $__NR_exit, %eax
syscall
// The exit syscall does not return.
END(_exit_with_stack_teardown)

View File

@ -29,53 +29,7 @@
#include <asm/unistd.h>
#include <machine/asm.h>
// int __pthread_clone(void* (*fn)(void*), void* tls, int flags, void* arg);
ENTRY(__pthread_clone)
# Save tls.
movq %rsi, %r11
# Enforce 16-byte alignment for child stack.
andq $~15, %rsi
# Copy 'fn', 'arg', and 'tls' onto the child stack.
movq %rdi, -32(%rsi) # fn
movq %rcx, -24(%rsi) # arg
movq %r11, -16(%rsi) # tls
subq $32, %rsi
movq %rdx, %rdi
movl $__NR_clone, %eax
syscall
testl %eax, %eax
jns 1f
# An error occurred, set errno and return -1.
negl %eax
movl %eax, %edi
call __set_errno
orl $-1, %eax
jmp 2f
1:
jnz 2f
# We're in the child now, so call __thread_entry
# with the arguments from the child stack moved into
# the appropriate registers. We avoid pop here to keep
# the required 16-byte stack alignment.
movq (%rsp), %rdi # fn
movq 8(%rsp), %rsi # arg
movq 16(%rsp), %rdx # tls
call __thread_entry
hlt
2:
ret
// int __bionic_clone(unsigned long clone_flags,
// void* new_sp,
// int* parent_tid_ptr,
// void* new_tls,
// int* child_tid_ptr,
// int (*fn)(void*),
// void* arg);
// pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
ENTRY(__bionic_clone)
# Enforce 16-byte alignment for child stack.
andq $~15, %rsi

View File

@ -32,26 +32,28 @@
#include "private/bionic_tls.h"
// This trampoline is called from the assembly _pthread_clone function.
// Our 'tls' and __pthread_clone's 'child_stack' are one and the same, just growing in
// opposite directions.
extern "C" void __thread_entry(void* (*func)(void*), void* arg, void** tls) {
// This trampoline is called from the assembly __bionic_clone function.
int __thread_entry(void* arg) {
pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
// Wait for our creating thread to release us. This lets it have time to
// notify gdb about this thread before we start doing anything.
// This also provides the memory barrier needed to ensure that all memory
// accesses previously made by the creating thread are visible to us.
pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF];
pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
pthread_mutex_lock(start_mutex);
pthread_mutex_destroy(start_mutex);
pthread_internal_t* thread = (pthread_internal_t*) tls[TLS_SLOT_THREAD_ID];
thread->tls = tls;
__init_tls(thread);
__init_alternate_signal_stack(thread);
if ((thread->internal_flags & PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED) != 0) {
pthread_exit(NULL);
}
void* result = func(arg);
void* result = thread->start_routine(thread->start_routine_arg);
pthread_exit(result);
return 0;
}

View File

@ -31,14 +31,7 @@
#include <stdarg.h>
#include <stdio.h>
extern int __bionic_clone(unsigned long clone_flags,
void* newsp,
int *parent_tidptr,
void *new_tls,
int *child_tidptr,
int (*fn)(void *),
void *arg);
extern pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg);
extern void __exit(int status);
/* this function is called from the __bionic_clone

View File

@ -49,6 +49,7 @@
extern "C" abort_msg_t** __abort_message_ptr;
extern "C" uintptr_t __get_sp(void);
extern "C" int __system_properties_init(void);
extern "C" int __set_tls(void* ptr);
// Not public, but well-known in the BSDs.
const char* __progname;
@ -96,7 +97,10 @@ void __libc_init_tls(KernelArgumentBlock& args) {
pthread_attr_setstack(&thread.attr, (void*) stack_bottom, stack_size);
_init_thread(&thread, false);
__init_tls(&thread);
__set_tls(thread.tls);
tls[TLS_SLOT_BIONIC_PREINIT] = &args;
__init_alternate_signal_stack(&thread);
}
void __libc_init_common(KernelArgumentBlock& args) {

View File

@ -40,7 +40,7 @@
#include "private/ErrnoRestorer.h"
#include "private/ScopedPthreadMutexLocker.h"
extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
extern "C" pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg);
#ifdef __i386__
#define ATTRIBUTES __attribute__((noinline)) __attribute__((fastcall))
@ -50,15 +50,14 @@ extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags,
extern "C" void ATTRIBUTES _thread_created_hook(pid_t thread_id);
extern "C" int __set_tls(void* ptr);
static pthread_mutex_t gPthreadStackCreationLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t gDebuggerNotificationLock = PTHREAD_MUTEX_INITIALIZER;
// This code is used both by each new pthread and the code that initializes the main thread.
void __init_tls(pthread_internal_t* thread) {
// Zero-initialize all the slots.
for (size_t i = 0; i < BIONIC_TLS_SLOTS; ++i) {
// Zero-initialize all the slots after TLS_SLOT_SELF and TLS_SLOT_THREAD_ID.
for (size_t i = TLS_SLOT_ERRNO; i < BIONIC_TLS_SLOTS; ++i) {
thread->tls[i] = NULL;
}
@ -67,11 +66,10 @@ void __init_tls(pthread_internal_t* thread) {
thread->tls[TLS_SLOT_THREAD_ID] = thread;
// GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
thread->tls[TLS_SLOT_STACK_GUARD] = (void*) __stack_chk_guard;
}
__set_tls(thread->tls);
void __init_alternate_signal_stack(pthread_internal_t* thread) {
// Create and set an alternate signal stack.
// This must happen after __set_tls, in case a system call fails and tries to set errno.
stack_t ss;
ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
if (ss.ss_sp != MAP_FAILED) {
@ -181,24 +179,26 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
// The child stack is the same address, just growing in the opposite direction.
// At offsets >= 0, we have the TLS slots.
// At offsets < 0, we have the child stack.
void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
void* child_stack = tls;
thread->tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
void* child_stack = thread->tls;
// Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
// Create a mutex for the thread in TLS to wait on once it starts so we can keep
// it from doing anything until after we notify the debugger about it
//
// This also provides the memory barrier we need to ensure that all
// memory accesses previously performed by this thread are visible to
// the new thread.
pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF];
pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
pthread_mutex_init(start_mutex, NULL);
ScopedPthreadMutexLocker start_locker(start_mutex);
tls[TLS_SLOT_THREAD_ID] = thread;
thread->tls[TLS_SLOT_THREAD_ID] = thread;
int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
thread->start_routine = start_routine;
thread->start_routine_arg = arg;
int tid = __pthread_clone(start_routine, child_stack, flags, arg);
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS;
int tid = __bionic_clone(flags, child_stack, NULL, thread->tls, NULL, __thread_entry, thread);
if (tid < 0) {
int clone_errno = errno;
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {

View File

@ -42,6 +42,9 @@ struct pthread_internal_t {
__pthread_cleanup_t* cleanup_stack;
void** tls; /* thread-local storage area */
void* (*start_routine)(void*);
void* start_routine_arg;
void* alternate_signal_stack;
/*
@ -52,8 +55,12 @@ struct pthread_internal_t {
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
};
extern "C" {
__LIBC_HIDDEN__ int __thread_entry(void* arg); // Called from assembler.
}
__LIBC_HIDDEN__ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list);
__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
__LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread);
__LIBC_HIDDEN__ pthread_internal_t* __get_thread(void);

View File

@ -51,6 +51,9 @@ enum {
TLS_SLOT_THREAD_ID,
TLS_SLOT_ERRNO,
/* This slot is used when starting a new thread, before any code that needs errno runs. */
TLS_SLOT_START_MUTEX = TLS_SLOT_ERRNO,
/* These two aren't used by bionic itself, but allow the graphics code to
* access TLS directly rather than using the pthread API. */
TLS_SLOT_OPENGL_API = 3,

View File

@ -20,6 +20,7 @@
#include <inttypes.h>
#include <limits.h>
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
TEST(pthread, pthread_key_create) {
@ -214,11 +215,12 @@ TEST(pthread, pthread_sigmask) {
}
#if __BIONIC__
extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
TEST(pthread, __pthread_clone) {
extern "C" pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
TEST(pthread, __bionic_clone) {
// Check that our hand-written clone assembler sets errno correctly on failure.
uintptr_t fake_child_stack[16];
errno = 0;
ASSERT_EQ(-1, __pthread_clone(NULL, &fake_child_stack[0], CLONE_THREAD, NULL));
ASSERT_EQ(-1, __bionic_clone(CLONE_THREAD, &fake_child_stack[0], NULL, NULL, NULL, NULL, NULL));
ASSERT_EQ(EINVAL, errno);
}
#endif
@ -373,6 +375,24 @@ TEST(pthread, pthread_join__multijoin) {
ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(join_result));
}
TEST(pthread, pthread_join__race) {
// http://b/11693195 --- pthread_join could return before the thread had actually exited.
// If the joiner unmapped the thread's stack, that could lead to SIGSEGV in the thread.
for (size_t i = 0; i < 1024; ++i) {
size_t stack_size = 64*1024;
void* stack = mmap(NULL, stack_size, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
pthread_attr_t a;
pthread_attr_init(&a);
pthread_attr_setstack(&a, stack, stack_size);
pthread_t t;
ASSERT_EQ(0, pthread_create(&t, &a, IdFn, NULL));
ASSERT_EQ(0, pthread_join(t, NULL));
ASSERT_EQ(0, munmap(stack, stack_size));
}
}
static void* GetActualGuardSizeFn(void* arg) {
pthread_attr_t attributes;
pthread_getattr_np(pthread_self(), &attributes);