crypto/modes/asm/aesni-gcm-x86_64.pl: minor optimization.
Avoid occasional up to 8% performance drops.
This commit is contained in:
parent
72a158703b
commit
7a1a12232a
@ -21,8 +21,8 @@
|
|||||||
# justify. This module is based on combination of Intel submissions,
|
# justify. This module is based on combination of Intel submissions,
|
||||||
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
|
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
|
||||||
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
|
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
|
||||||
# pressure with notable relative improvement on upcoming Haswell
|
# pressure with notable relative improvement, achieving 1.0 cycle per
|
||||||
# processor. [Exact performance numbers to be added at launch.]
|
# byte processed with 128-bit key on Haswell processor.
|
||||||
#
|
#
|
||||||
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
||||||
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
|
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
|
||||||
@ -422,17 +422,28 @@ $code.=<<___;
|
|||||||
vzeroupper
|
vzeroupper
|
||||||
|
|
||||||
vmovdqu ($ivp),$T1 # input counter value
|
vmovdqu ($ivp),$T1 # input counter value
|
||||||
sub \$128,%rsp
|
add \$-128,%rsp
|
||||||
mov 12($ivp),$counter
|
mov 12($ivp),$counter
|
||||||
lea .Lbswap_mask(%rip),$const
|
lea .Lbswap_mask(%rip),$const
|
||||||
|
lea -0x80($key),$in0 # borrow $in0
|
||||||
|
mov \$0xf80,$end0 # borrow $end0
|
||||||
vmovdqu ($Xip),$Xi # load Xi
|
vmovdqu ($Xip),$Xi # load Xi
|
||||||
and \$-64,%rsp # ensure stack alignment
|
and \$-128,%rsp # ensure stack alignment
|
||||||
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
|
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
|
||||||
lea 0x80($key),$key # size optimization
|
lea 0x80($key),$key # size optimization
|
||||||
lea 0x20+0x20($Xip),$Xip # size optimization
|
lea 0x20+0x20($Xip),$Xip # size optimization
|
||||||
mov 0xf0-0x80($key),$rounds
|
mov 0xf0-0x80($key),$rounds
|
||||||
vpshufb $Ii,$Xi,$Xi
|
vpshufb $Ii,$Xi,$Xi
|
||||||
|
|
||||||
|
and $end0,$in0
|
||||||
|
and %rsp,$end0
|
||||||
|
sub $in0,$end0
|
||||||
|
jc .Ldec_no_key_aliasing
|
||||||
|
cmp \$768,$end0
|
||||||
|
jnc .Ldec_no_key_aliasing
|
||||||
|
sub $end0,%rsp # avoid aliasing with key
|
||||||
|
.Ldec_no_key_aliasing:
|
||||||
|
|
||||||
vmovdqu 0x50($inp),$Z3 # I[5]
|
vmovdqu 0x50($inp),$Z3 # I[5]
|
||||||
lea ($inp),$in0
|
lea ($inp),$in0
|
||||||
vmovdqu 0x40($inp),$Z0
|
vmovdqu 0x40($inp),$Z0
|
||||||
@ -621,14 +632,25 @@ $code.=<<___;
|
|||||||
vzeroupper
|
vzeroupper
|
||||||
|
|
||||||
vmovdqu ($ivp),$T1 # input counter value
|
vmovdqu ($ivp),$T1 # input counter value
|
||||||
sub \$128,%rsp
|
add \$-128,%rsp
|
||||||
mov 12($ivp),$counter
|
mov 12($ivp),$counter
|
||||||
lea .Lbswap_mask(%rip),$const
|
lea .Lbswap_mask(%rip),$const
|
||||||
|
lea -0x80($key),$in0 # borrow $in0
|
||||||
|
mov \$0xf80,$end0 # borrow $end0
|
||||||
lea 0x80($key),$key # size optimization
|
lea 0x80($key),$key # size optimization
|
||||||
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
|
vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask
|
||||||
and \$-64,%rsp # ensure stack alignment
|
and \$-128,%rsp # ensure stack alignment
|
||||||
mov 0xf0-0x80($key),$rounds
|
mov 0xf0-0x80($key),$rounds
|
||||||
|
|
||||||
|
and $end0,$in0
|
||||||
|
and %rsp,$end0
|
||||||
|
sub $in0,$end0
|
||||||
|
jc .Lenc_no_key_aliasing
|
||||||
|
cmp \$768,$end0
|
||||||
|
jnc .Lenc_no_key_aliasing
|
||||||
|
sub $end0,%rsp # avoid aliasing with key
|
||||||
|
.Lenc_no_key_aliasing:
|
||||||
|
|
||||||
lea ($out),$in0
|
lea ($out),$in0
|
||||||
lea -0xc0($out,$len),$end0
|
lea -0xc0($out,$len),$end0
|
||||||
shr \$4,$len
|
shr \$4,$len
|
||||||
|
Loading…
x
Reference in New Issue
Block a user