libc/x86: ensure the stack 16-byte aligned when tasks created
Currently Renderscript sample code RsBalls crashed on x86 when SSE2
enabled. The root cause is that the stack was not 16-byte aligned
from the beginning when the processes/threads were created, so the
RsBalls crashed when SSE2 instructions tried to access the variables
on the stack.
- For the thread created by fork():
Its stack alignment is determined by crtbegin_{dynamic, static}.S
- For the thread created by pthread_create():
Its stack alignment is determined by clone.S. __thread_entry( ) is
a standard C function. In order to have its stack be aligned with
16 byte properly, __thread_entry() needs the stack with following
layout when it is called:
layout #1 (correct)
--------------
| |
-------------- <--ESP (ECX - 20)
| ret EIP |
-------------- <--ECX - 16
| arg0 |
-------------- <--ECX - 12
| arg1 |
-------------- <--ECX - 8
| arg2 |
-------------- <--ECX - 4
| unused |
-------------- <--ECX (16-byte boundary)
But it has following layout for now:
layout #2: (incorrect)
--------------
| |
-------------- <--ESP (ECX - 16)
| unused |
-------------- <--ECX - 12
| arg0 |
-------------- <--ECX - 8
| arg1 |
-------------- <--ECX - 4
| arg2 |
-------------- <--ECX (16-byte boundary)
Fixed in this patch.
Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8
Signed-off-by: Michael Liao <michael.liao@intel.com>
Signed-off-by: H.J. Lu <hongjiu.lu@intel.com>
Signed-off-by: Jack Ren <jack.ren@intel.com>
Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
This commit is contained in:
@@ -13,16 +13,21 @@ __pthread_clone:
|
|||||||
pushl %ebx
|
pushl %ebx
|
||||||
pushl %ecx
|
pushl %ecx
|
||||||
movl 16(%esp), %ecx
|
movl 16(%esp), %ecx
|
||||||
movl 20(%esp), %ebx
|
|
||||||
|
# save tls
|
||||||
|
movl %ecx, %ebx
|
||||||
|
# 16-byte alignment on child stack
|
||||||
|
andl $~15, %ecx
|
||||||
|
|
||||||
# insert arguments onto the child stack
|
# insert arguments onto the child stack
|
||||||
movl 12(%esp), %eax
|
movl 12(%esp), %eax
|
||||||
movl %eax, -12(%ecx)
|
movl %eax, -16(%ecx)
|
||||||
movl 24(%esp), %eax
|
movl 24(%esp), %eax
|
||||||
movl %eax, -8(%ecx)
|
movl %eax, -12(%ecx)
|
||||||
movl %ecx, -4(%ecx)
|
movl %ebx, -8(%ecx)
|
||||||
|
|
||||||
subl $16, %ecx
|
subl $16, %ecx
|
||||||
|
movl 20(%esp), %ebx
|
||||||
movl $__NR_clone, %eax
|
movl $__NR_clone, %eax
|
||||||
int $0x80
|
int $0x80
|
||||||
test %eax, %eax
|
test %eax, %eax
|
||||||
@@ -40,7 +45,7 @@ __pthread_clone:
|
|||||||
# we're in the child thread now, call __thread_entry
|
# we're in the child thread now, call __thread_entry
|
||||||
# with the appropriate arguments on the child stack
|
# with the appropriate arguments on the child stack
|
||||||
# we already placed most of them
|
# we already placed most of them
|
||||||
jmp __thread_entry
|
call __thread_entry
|
||||||
hlt
|
hlt
|
||||||
|
|
||||||
2:
|
2:
|
||||||
|
|||||||
@@ -51,6 +51,8 @@
|
|||||||
#
|
#
|
||||||
_start:
|
_start:
|
||||||
mov %esp, %eax
|
mov %esp, %eax
|
||||||
|
# before push arguments, align the stack to a 16 byte boundary
|
||||||
|
andl $~15, %esp
|
||||||
mov $1f, %edx
|
mov $1f, %edx
|
||||||
pushl %edx
|
pushl %edx
|
||||||
mov $0f, %edx
|
mov $0f, %edx
|
||||||
|
|||||||
@@ -51,6 +51,8 @@
|
|||||||
#
|
#
|
||||||
_start:
|
_start:
|
||||||
mov %esp, %eax
|
mov %esp, %eax
|
||||||
|
# before push arguments, align the stack to a 16 byte boundary
|
||||||
|
andl $~15, %esp
|
||||||
mov $1f, %edx
|
mov $1f, %edx
|
||||||
pushl %edx
|
pushl %edx
|
||||||
mov $0f, %edx
|
mov $0f, %edx
|
||||||
|
|||||||
Reference in New Issue
Block a user