From cb08204053a285951b1907ef14a832f16a1a4679 Mon Sep 17 00:00:00 2001 From: Jack Ren Date: Wed, 21 Mar 2012 17:48:13 +0800 Subject: [PATCH] libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao Signed-off-by: H.J. Lu Signed-off-by: Jack Ren Signed-off-by: Bruce Beare --- libc/arch-x86/bionic/clone.S | 15 ++++++++++----- libc/arch-x86/bionic/crtbegin_dynamic.S | 2 ++ libc/arch-x86/bionic/crtbegin_static.S | 2 ++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/libc/arch-x86/bionic/clone.S b/libc/arch-x86/bionic/clone.S index 8abb7c84e..352d23c5c 100644 --- a/libc/arch-x86/bionic/clone.S +++ b/libc/arch-x86/bionic/clone.S @@ -13,16 +13,21 @@ __pthread_clone: pushl %ebx pushl %ecx movl 16(%esp), %ecx - movl 20(%esp), %ebx + + # save tls + movl %ecx, %ebx + # 16-byte alignment on child stack + andl $~15, %ecx # insert arguments onto the child stack movl 12(%esp), %eax - movl %eax, -12(%ecx) + movl %eax, -16(%ecx) movl 24(%esp), %eax - movl %eax, -8(%ecx) - movl %ecx, -4(%ecx) + movl %eax, -12(%ecx) + movl %ebx, -8(%ecx) subl $16, %ecx + movl 20(%esp), %ebx movl $__NR_clone, %eax int $0x80 test %eax, %eax @@ -40,7 +45,7 @@ __pthread_clone: # we're in the child thread now, call __thread_entry # with the appropriate arguments on the child stack # we already placed most of them - jmp __thread_entry + call __thread_entry hlt 2: diff --git a/libc/arch-x86/bionic/crtbegin_dynamic.S b/libc/arch-x86/bionic/crtbegin_dynamic.S index 9ba0d2f6c..177244b70 100644 --- a/libc/arch-x86/bionic/crtbegin_dynamic.S +++ b/libc/arch-x86/bionic/crtbegin_dynamic.S @@ -51,6 +51,8 @@ # _start: mov %esp, %eax + # before push arguments, align the stack to a 16 byte boundary + andl $~15, %esp mov $1f, %edx pushl %edx mov $0f, %edx diff --git a/libc/arch-x86/bionic/crtbegin_static.S b/libc/arch-x86/bionic/crtbegin_static.S index 8e7033017..4fffecdc8 100644 --- a/libc/arch-x86/bionic/crtbegin_static.S +++ b/libc/arch-x86/bionic/crtbegin_static.S @@ -51,6 +51,8 @@ # _start: mov %esp, %eax + # before push arguments, align the stack to a 16 byte boundary + andl $~15, %esp mov $1f, %edx pushl %edx mov $0f, %edx