bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking.
Some OSes, *cough*-dows, insist on stack being "wired" to physical memory in strictly sequential manner, i.e. if stack allocation spans two pages, then reference to farmost one can be punishable by SEGV. But page walking can do good even on other OSes, because it guarantees that villain thread hits the guard page before it can make damage to innocent one... Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
parent
56cd71b46e
commit
adc4f1fc25
@ -85,6 +85,21 @@ $frame=32; # size of above frame rounded up to 16n
|
||||
|
||||
&and ("esp",-64); # align to cache line
|
||||
|
||||
# Some OSes, *cough*-dows, insist on stack being "wired" to
|
||||
# physical memory in strictly sequential manner, i.e. if stack
|
||||
# allocation spans two pages, then reference to farmost one can
|
||||
# be punishable by SEGV. But page walking can do good even on
|
||||
# other OSes, because it guarantees that villain thread hits
|
||||
# the guard page before it can make damage to innocent one...
|
||||
&mov ("eax","ebp");
|
||||
&sub ("eax","esp");
|
||||
&and ("eax",-4096);
|
||||
&set_label("page_walk");
|
||||
&mov ("edx",&DWP(0,"esp","eax"));
|
||||
&sub ("eax",4096);
|
||||
&data_byte(0x2e);
|
||||
&jnc (&label("page_walk"));
|
||||
|
||||
################################# load argument block...
|
||||
&mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
|
||||
&mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
|
||||
|
@ -130,6 +130,20 @@ $code.=<<___;
|
||||
|
||||
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
|
||||
.Lmul_body:
|
||||
# Some OSes, *cough*-dows, insist on stack being "wired" to
|
||||
# physical memory in strictly sequential manner, i.e. if stack
|
||||
# allocation spans two pages, then reference to farmost one can
|
||||
# be punishable by SEGV. But page walking can do good even on
|
||||
# other OSes, because it guarantees that villain thread hits
|
||||
# the guard page before it can make damage to innocent one...
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lmul_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x66,0x2e # predict non-taken
|
||||
jnc .Lmul_page_walk
|
||||
|
||||
mov $bp,%r12 # reassign $bp
|
||||
___
|
||||
$bp="%r12";
|
||||
@ -342,6 +356,14 @@ $code.=<<___;
|
||||
|
||||
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
|
||||
.Lmul4x_body:
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lmul4x_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lmul4x_page_walk
|
||||
|
||||
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
|
||||
mov %rdx,%r12 # reassign $bp
|
||||
___
|
||||
@ -795,6 +817,15 @@ bn_sqr8x_mont:
|
||||
sub %r11,%rsp
|
||||
.Lsqr8x_sp_done:
|
||||
and \$-64,%rsp
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lsqr8x_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lsqr8x_page_walk
|
||||
|
||||
mov $num,%r10
|
||||
neg $num
|
||||
|
||||
@ -932,8 +963,17 @@ bn_mulx4x_mont:
|
||||
sub $num,%r10 # -$num
|
||||
mov ($n0),$n0 # *n0
|
||||
lea -72(%rsp,%r10),%rsp # alloca(frame+$num+8)
|
||||
lea ($bp,$num),%r10
|
||||
and \$-128,%rsp
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lmulx4x_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x66,0x2e # predict non-taken
|
||||
jnc .Lmulx4x_page_walk
|
||||
|
||||
lea ($bp,$num),%r10
|
||||
##############################################################
|
||||
# Stack layout
|
||||
# +0 num
|
||||
|
@ -115,6 +115,20 @@ $code.=<<___;
|
||||
|
||||
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
|
||||
.Lmul_body:
|
||||
# Some OSes, *cough*-dows, insist on stack being "wired" to
|
||||
# physical memory in strictly sequential manner, i.e. if stack
|
||||
# allocation spans two pages, then reference to farmost one can
|
||||
# be punishable by SEGV. But page walking can do good even on
|
||||
# other OSes, because it guarantees that villain thread hits
|
||||
# the guard page before it can make damage to innocent one...
|
||||
sub %rsp,%rax
|
||||
and \$-4096,%rax
|
||||
.Lmul_page_walk:
|
||||
mov (%rsp,%rax),%r11
|
||||
sub \$4096,%rax
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lmul_page_walk
|
||||
|
||||
lea 128($bp),%r12 # reassign $bp (+size optimization)
|
||||
___
|
||||
$bp="%r12";
|
||||
@ -469,6 +483,15 @@ $code.=<<___;
|
||||
sub %r11,%rsp
|
||||
.Lmul4xsp_done:
|
||||
and \$-64,%rsp
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lmul4x_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lmul4x_page_walk
|
||||
|
||||
neg $num
|
||||
|
||||
mov %rax,40(%rsp)
|
||||
@ -1058,6 +1081,15 @@ $code.=<<___;
|
||||
sub %r11,%rsp
|
||||
.Lpwr_sp_done:
|
||||
and \$-64,%rsp
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lpwr_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lpwr_page_walk
|
||||
|
||||
mov $num,%r10
|
||||
neg $num
|
||||
|
||||
@ -2028,7 +2060,16 @@ bn_from_mont8x:
|
||||
sub %r11,%rsp
|
||||
.Lfrom_sp_done:
|
||||
and \$-64,%rsp
|
||||
mov $num,%r10
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lfrom_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lfrom_page_walk
|
||||
|
||||
mov $num,%r10
|
||||
neg $num
|
||||
|
||||
##############################################################
|
||||
@ -2173,6 +2214,15 @@ bn_mulx4x_mont_gather5:
|
||||
sub %r11,%rsp
|
||||
.Lmulx4xsp_done:
|
||||
and \$-64,%rsp # ensure alignment
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lmulx4x_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lmulx4x_page_walk
|
||||
|
||||
##############################################################
|
||||
# Stack layout
|
||||
# +0 -num
|
||||
@ -2619,6 +2669,15 @@ bn_powerx5:
|
||||
sub %r11,%rsp
|
||||
.Lpwrx_sp_done:
|
||||
and \$-64,%rsp
|
||||
mov %rax,%r11
|
||||
sub %rsp,%r11
|
||||
and \$-4096,%r11
|
||||
.Lpwrx_page_walk:
|
||||
mov (%rsp,%r11),%r10
|
||||
sub \$4096,%r11
|
||||
.byte 0x2e # predict non-taken
|
||||
jnc .Lpwrx_page_walk
|
||||
|
||||
mov $num,%r10
|
||||
neg $num
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user