From 0d236aa3f1e6d31b0c729448ae9d3ed1cad23fb4 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 9 May 2014 14:42:16 -0700 Subject: [PATCH] Align the child stack in clone(2). Also let clone(2) set the TLS for x86. Also ensure we initialize the TLS before we clone(2) for all architectures. Change-Id: Ie5fa4466e1c9ee116a281dfedef574c5ba60c0b5 --- libc/arch-arm64/bionic/__bionic_clone.S | 3 - libc/arch-x86/bionic/__bionic_clone.S | 14 ++-- libc/arch-x86/bionic/__set_tls.c | 99 +++++++++--------------- libc/arch-x86_64/bionic/__bionic_clone.S | 3 - libc/arch-x86_64/bionic/__set_tls.c | 2 - libc/bionic/clone.cpp | 5 ++ libc/bionic/pthread_create.cpp | 29 ++++--- 7 files changed, 61 insertions(+), 94 deletions(-) diff --git a/libc/arch-arm64/bionic/__bionic_clone.S b/libc/arch-arm64/bionic/__bionic_clone.S index 74db790d6..ddd8ee085 100644 --- a/libc/arch-arm64/bionic/__bionic_clone.S +++ b/libc/arch-arm64/bionic/__bionic_clone.S @@ -35,9 +35,6 @@ ENTRY(__bionic_clone) mov x29, sp str x8, [sp, #-16]! - # Align 'child_stack' to 16 bytes. - and x1, x1, #~0xf - # Copy 'fn' and 'arg' onto the child stack. stp x5, x6, [x1, #-16] diff --git a/libc/arch-x86/bionic/__bionic_clone.S b/libc/arch-x86/bionic/__bionic_clone.S index cb0a363c1..af6ef1907 100644 --- a/libc/arch-x86/bionic/__bionic_clone.S +++ b/libc/arch-x86/bionic/__bionic_clone.S @@ -6,9 +6,12 @@ ENTRY(__bionic_clone) pushl %esi pushl %edi - # Align 'child_stack' to 16 bytes. - movl 20(%esp), %ecx - andl $~15, %ecx + # Load system call arguments into registers. + movl 16(%esp), %ebx # flags + movl 20(%esp), %ecx # child_stack + movl 24(%esp), %edx # parent_tid + movl 28(%esp), %esi # tls + movl 32(%esp), %edi # child_tid # Copy 'fn' and 'arg' onto the child stack movl 36(%esp), %eax # Read 'fn'. @@ -19,11 +22,6 @@ ENTRY(__bionic_clone) # Make the system call. movl $__NR_clone, %eax - movl 16(%esp), %ebx # flags - #movl %ecx, %ecx # child stack (already there) - movl 24(%esp), %edx # parent_tid - movl 28(%esp), %esi # tls - movl 32(%esp), %edi # child_tid int $0x80 # Check result. diff --git a/libc/arch-x86/bionic/__set_tls.c b/libc/arch-x86/bionic/__set_tls.c index 7ed4b0152..722ec6f6c 100644 --- a/libc/arch-x86/bionic/__set_tls.c +++ b/libc/arch-x86/bionic/__set_tls.c @@ -25,77 +25,50 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + +#include #include +#include +#include -struct user_desc { - unsigned int entry_number; - unsigned long base_addr; - unsigned int limit; - unsigned int seg_32bit:1; - unsigned int contents:2; - unsigned int read_exec_only:1; - unsigned int limit_in_pages:1; - unsigned int seg_not_present:1; - unsigned int useable:1; - unsigned int empty:25; -}; +extern int __set_thread_area(struct user_desc*); -extern int __set_thread_area(struct user_desc *u_info); +__LIBC_HIDDEN__ void __init_user_desc(struct user_desc* result, bool allocate, void* base_addr) { + if (allocate) { + // Let the kernel choose. + result->entry_number = -1; + } else { + // Get the existing entry number from %gs. + uint32_t gs; + __asm__ __volatile__("movw %%gs, %w0" : "=q"(gs) /*output*/); + result->entry_number = (gs & 0xffff) >> 3; + } -/* the following can't be const, since the first call will - * update the 'entry_number' field - */ -static struct user_desc _tls_desc = -{ - -1, - 0, - 0x1000, - 1, - 0, - 0, - 1, - 0, - 1, - 0 -}; + result->base_addr = (uintptr_t) base_addr; -static pthread_mutex_t _tls_desc_lock = PTHREAD_MUTEX_INITIALIZER; + result->limit = PAGE_SIZE; -struct _thread_area_head { - void *self; -}; - -/* we implement thread local storage through the gs: segment descriptor - * we create a segment descriptor for the tls - */ -int __set_tls(void *ptr) -{ - int rc, segment; - - pthread_mutex_lock(&_tls_desc_lock); - _tls_desc.base_addr = (unsigned long)ptr; - - /* We also need to write the location of the tls to ptr[0] */ - ((struct _thread_area_head *)ptr)->self = ptr; - - rc = __set_thread_area( &_tls_desc ); - if (rc != 0) - { - /* could not set thread local area */ - pthread_mutex_unlock(&_tls_desc_lock); - return -1; - } - - /* this weird computation comes from GLibc */ - segment = _tls_desc.entry_number*8 + 3; - asm __volatile__ ( - " movw %w0, %%gs" :: "q"(segment) - ); - pthread_mutex_unlock(&_tls_desc_lock); - - return 0; + result->seg_32bit = 1; + result->contents = MODIFY_LDT_CONTENTS_DATA; + result->read_exec_only = 0; + result->limit_in_pages = 1; + result->seg_not_present = 0; + result->useable = 1; } +int __set_tls(void* ptr) { + struct user_desc tls_descriptor; + __init_user_desc(&tls_descriptor, true, ptr); + int rc = __set_thread_area(&tls_descriptor); + if (rc != -1) { + // Change %gs to be new GDT entry. + uint16_t table_indicator = 0; // GDT + uint16_t rpl = 3; // Requested privilege level + uint16_t selector = (tls_descriptor.entry_number << 3) | table_indicator | rpl; + __asm__ __volatile__("movw %w0, %%gs" : /*output*/ : "q"(selector) /*input*/ : /*clobber*/); + } + return rc; +} diff --git a/libc/arch-x86_64/bionic/__bionic_clone.S b/libc/arch-x86_64/bionic/__bionic_clone.S index 62c9666e0..db7d05c28 100644 --- a/libc/arch-x86_64/bionic/__bionic_clone.S +++ b/libc/arch-x86_64/bionic/__bionic_clone.S @@ -30,9 +30,6 @@ // pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg); ENTRY(__bionic_clone) - # Align 'child_stack' to 16 bytes. - andq $~15, %rsi - # Copy 'fn' and 'arg' onto the child stack. movq %r9, -16(%rsi) # fn movq 8(%rsp), %rax # Read 'arg'. diff --git a/libc/arch-x86_64/bionic/__set_tls.c b/libc/arch-x86_64/bionic/__set_tls.c index 9a694494b..cc7a5f454 100644 --- a/libc/arch-x86_64/bionic/__set_tls.c +++ b/libc/arch-x86_64/bionic/__set_tls.c @@ -32,7 +32,5 @@ extern int __arch_prctl(int, unsigned long); int __set_tls(void* ptr) { - // We also need to write the location of the tls to ptr[0]. - *(void**) ptr = ptr; return __arch_prctl(ARCH_SET_FS, (uintptr_t) ptr); } diff --git a/libc/bionic/clone.cpp b/libc/bionic/clone.cpp index 4ea3c70ce..2c507c4e3 100644 --- a/libc/bionic/clone.cpp +++ b/libc/bionic/clone.cpp @@ -59,5 +59,10 @@ int clone(int (*fn)(void*), void* child_stack, int flags, void* arg, ...) { } va_end(args); + // Align 'child_stack' to 16 bytes. + uintptr_t child_stack_addr = reinterpret_cast(child_stack); + child_stack_addr &= ~0xf; + child_stack = reinterpret_cast(child_stack_addr); + return __bionic_clone(flags, child_stack, parent_tid, new_tls, child_tid, fn, arg); } diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp index 3d0daf7c9..f62dc1500 100644 --- a/libc/bionic/pthread_create.cpp +++ b/libc/bionic/pthread_create.cpp @@ -39,9 +39,6 @@ #include "private/ErrnoRestorer.h" #include "private/ScopedPthreadMutexLocker.h" -extern "C" pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg); -extern "C" int __set_tls(void*); - // Used by gdb to track thread creation. See libthread_db. #ifdef __i386__ extern "C" __attribute__((noinline)) __attribute__((fastcall)) void _thread_created_hook(pid_t) {} @@ -49,6 +46,12 @@ extern "C" __attribute__((noinline)) __attribute__((fastcall)) void _thread_crea extern "C" __attribute__((noinline)) void _thread_created_hook(pid_t) {} #endif +// x86 uses segment descriptors rather than a direct pointer to TLS. +#if __i386__ +#include +extern "C" __LIBC_HIDDEN__ void __init_user_desc(struct user_desc*, int, void*); +#endif + static pthread_mutex_t gPthreadStackCreationLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t gDebuggerNotificationLock = PTHREAD_MUTEX_INITIALIZER; @@ -62,10 +65,6 @@ void __init_tls(pthread_internal_t* thread) { thread->tls[i] = NULL; } -#if defined(__i386__) - __set_tls(thread->tls); -#endif - // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0. thread->tls[TLS_SLOT_SELF] = thread->tls; thread->tls[TLS_SLOT_THREAD_ID] = thread; @@ -148,8 +147,7 @@ static int __pthread_start(void* arg) { pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX]; pthread_mutex_lock(start_mutex); pthread_mutex_destroy(start_mutex); - - __init_tls(thread); + thread->tls[TLS_SLOT_START_MUTEX] = NULL; __init_alternate_signal_stack(thread); @@ -208,6 +206,7 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, // At offsets < 0, we have the child stack. thread->tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*)); void* child_stack = thread->tls; + __init_tls(thread); // Create a mutex for the thread in TLS to wait on once it starts so we can keep // it from doing anything until after we notify the debugger about it @@ -219,20 +218,20 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, pthread_mutex_init(start_mutex, NULL); pthread_mutex_lock(start_mutex); - thread->tls[TLS_SLOT_THREAD_ID] = thread; - thread->start_routine = start_routine; thread->start_routine_arg = arg; int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID; + void* tls = thread->tls; #if defined(__i386__) // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than - // a pointer to the TLS itself. Rather than try to deal with that here, we just let x86 set - // the TLS manually in __init_tls, like all architectures used to. - flags &= ~CLONE_SETTLS; + // a pointer to the TLS itself. + user_desc tls_descriptor; + __init_user_desc(&tls_descriptor, false, tls); + tls = &tls_descriptor; #endif - int rc = __bionic_clone(flags, child_stack, &(thread->tid), thread->tls, &(thread->tid), __pthread_start, thread); + int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid)); if (rc == -1) { int clone_errno = errno; // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to