diff --git a/libc/arch-common/bionic/crtbegin.c b/libc/arch-common/bionic/crtbegin.c index bc67bfb2c..fa9f3f32b 100644 --- a/libc/arch-common/bionic/crtbegin.c +++ b/libc/arch-common/bionic/crtbegin.c @@ -50,6 +50,10 @@ void _start() { array.fini_array = &__FINI_ARRAY__; void* raw_args = (void*) ((uintptr_t) __builtin_frame_address(0) + sizeof(void*)); +#ifdef __x86_64__ + // 16-byte stack alignment is required by x86_64 ABI + asm("andq $~15, %rsp"); +#endif __libc_init(raw_args, NULL, &main, &array); } diff --git a/libc/arch-x86_64/bionic/clone.S b/libc/arch-x86_64/bionic/clone.S index 2ae0e851c..7511e8673 100644 --- a/libc/arch-x86_64/bionic/clone.S +++ b/libc/arch-x86_64/bionic/clone.S @@ -59,10 +59,11 @@ ENTRY(__pthread_clone) # We're in the child now, so call __thread_entry # with the arguments from the child stack moved into - # the appropriate registers. - popq %rdi # fn - popq %rsi # arg - popq %rdx # tls + # the appropriate registers. We avoid pop here to keep + # the required 16-byte stack alignment. + movq (%rsp), %rdi # fn + movq 8(%rsp), %rsi # arg + movq 16(%rsp), %rdx # tls call __thread_entry hlt 2: