ghash-x86_64.pl: minor optimization.
This commit is contained in:
parent
2f0275a4c3
commit
fbf7c44bbf
@ -773,15 +773,11 @@ $code.=<<___;
|
|||||||
pxor $T1,$Xi # Ii+Xi
|
pxor $T1,$Xi # Ii+Xi
|
||||||
|
|
||||||
movdqa $Xln,$Xhn
|
movdqa $Xln,$Xhn
|
||||||
pshufd \$0b01001110,$Xln,$Xmn
|
pshufd \$0b01001110,$Xln,$T1
|
||||||
pxor $Xln,$Xmn
|
pxor $Xln,$T1
|
||||||
pclmulqdq \$0x00,$Hkey,$Xln
|
pclmulqdq \$0x00,$Hkey,$Xln
|
||||||
pclmulqdq \$0x11,$Hkey,$Xhn
|
pclmulqdq \$0x11,$Hkey,$Xhn
|
||||||
pclmulqdq \$0x00,$HK,$Xmn
|
pclmulqdq \$0x00,$HK,$T1
|
||||||
|
|
||||||
movdqa $Xi,$Xhi
|
|
||||||
pshufd \$0b01001110,$Xi,$T1 #
|
|
||||||
pxor $Xi,$T1 #
|
|
||||||
|
|
||||||
lea 32($inp),$inp # i+=2
|
lea 32($inp),$inp # i+=2
|
||||||
sub \$0x20,$len
|
sub \$0x20,$len
|
||||||
@ -790,30 +786,32 @@ $code.=<<___;
|
|||||||
|
|
||||||
.align 32
|
.align 32
|
||||||
.Lmod_loop:
|
.Lmod_loop:
|
||||||
|
movdqa $Xi,$Xhi
|
||||||
|
pshufd \$0b01001110,$Xi,$T2 #
|
||||||
|
pxor $Xi,$T2 #
|
||||||
|
|
||||||
pclmulqdq \$0x00,$Hkey2,$Xi
|
pclmulqdq \$0x00,$Hkey2,$Xi
|
||||||
pclmulqdq \$0x11,$Hkey2,$Xhi
|
pclmulqdq \$0x11,$Hkey2,$Xhi
|
||||||
movdqu ($inp),$T2 # Ii
|
pclmulqdq \$0x10,$HK,$T2
|
||||||
pclmulqdq \$0x10,$HK,$T1
|
|
||||||
pshufb $T3,$T2
|
|
||||||
|
|
||||||
pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
|
pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
|
||||||
movdqu 16($inp),$Xln # Ii+1
|
|
||||||
pxor $Xhn,$Xhi
|
pxor $Xhn,$Xhi
|
||||||
|
movdqu ($inp),$Xhn # Ii
|
||||||
|
pshufb $T3,$Xhn
|
||||||
|
movdqu 16($inp),$Xln # Ii+1
|
||||||
|
|
||||||
pxor $Xi,$Xmn # aggregated Karatsuba post-processing
|
pxor $Xi,$T1 # aggregated Karatsuba post-processing
|
||||||
pxor $Xhi,$Xmn
|
pxor $Xhi,$T1
|
||||||
pxor $T2,$Xhi # "Ii+Xi", consume early
|
pxor $Xhn,$Xhi # "Ii+Xi", consume early
|
||||||
pxor $Xmn,$T1
|
pxor $T1,$T2
|
||||||
pshufb $T3,$Xln
|
pshufb $T3,$Xln
|
||||||
movdqa $T1,$T2 #
|
movdqa $T2,$T1 #
|
||||||
psrldq \$8,$T1
|
psrldq \$8,$T1
|
||||||
pslldq \$8,$T2 #
|
pslldq \$8,$T2 #
|
||||||
pxor $T1,$Xhi
|
pxor $T1,$Xhi
|
||||||
pxor $T2,$Xi #
|
pxor $T2,$Xi #
|
||||||
|
|
||||||
movdqa $Xln,$Xhn #
|
movdqa $Xln,$Xhn #
|
||||||
pshufd \$0b01001110,$Xln,$Xmn
|
|
||||||
pxor $Xln,$Xmn #
|
|
||||||
|
|
||||||
movdqa $Xi,$T2 # 1st phase
|
movdqa $Xi,$T2 # 1st phase
|
||||||
movdqa $Xi,$T1
|
movdqa $Xi,$T1
|
||||||
@ -828,6 +826,8 @@ $code.=<<___;
|
|||||||
psrldq \$8,$T1 #
|
psrldq \$8,$T1 #
|
||||||
pxor $T2,$Xi
|
pxor $T2,$Xi
|
||||||
pxor $T1,$Xhi #
|
pxor $T1,$Xhi #
|
||||||
|
pshufd \$0b01001110,$Xhn,$T1
|
||||||
|
pxor $Xhn,$T1 #
|
||||||
|
|
||||||
pclmulqdq \$0x11,$Hkey,$Xhn #######
|
pclmulqdq \$0x11,$Hkey,$Xhn #######
|
||||||
movdqa $Xi,$T2 # 2nd phase
|
movdqa $Xi,$T2 # 2nd phase
|
||||||
@ -837,28 +837,28 @@ $code.=<<___;
|
|||||||
psrlq \$5,$Xi
|
psrlq \$5,$Xi
|
||||||
pxor $T2,$Xi #
|
pxor $T2,$Xi #
|
||||||
psrlq \$1,$Xi #
|
psrlq \$1,$Xi #
|
||||||
pclmulqdq \$0x00,$HK,$Xmn #######
|
pclmulqdq \$0x00,$HK,$T1 #######
|
||||||
pxor $Xhi,$Xi #
|
pxor $Xhi,$Xi #
|
||||||
|
|
||||||
movdqa $Xi,$Xhi
|
|
||||||
pshufd \$0b01001110,$Xi,$T1 #
|
|
||||||
pxor $Xi,$T1 #
|
|
||||||
|
|
||||||
lea 32($inp),$inp
|
lea 32($inp),$inp
|
||||||
sub \$0x20,$len
|
sub \$0x20,$len
|
||||||
ja .Lmod_loop
|
ja .Lmod_loop
|
||||||
|
|
||||||
.Leven_tail:
|
.Leven_tail:
|
||||||
|
movdqa $Xi,$Xhi
|
||||||
|
pshufd \$0b01001110,$Xi,$T2 #
|
||||||
|
pxor $Xi,$T2 #
|
||||||
|
|
||||||
pclmulqdq \$0x00,$Hkey2,$Xi
|
pclmulqdq \$0x00,$Hkey2,$Xi
|
||||||
pclmulqdq \$0x11,$Hkey2,$Xhi
|
pclmulqdq \$0x11,$Hkey2,$Xhi
|
||||||
pclmulqdq \$0x10,$HK,$T1
|
pclmulqdq \$0x10,$HK,$T2
|
||||||
|
|
||||||
pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
|
pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
|
||||||
pxor $Xhn,$Xhi
|
pxor $Xhn,$Xhi
|
||||||
pxor $Xi,$Xmn
|
pxor $Xi,$T1
|
||||||
pxor $Xhi,$Xmn
|
pxor $Xhi,$T1
|
||||||
pxor $Xmn,$T1
|
pxor $T1,$T2
|
||||||
movdqa $T1,$T2 #
|
movdqa $T2,$T1 #
|
||||||
psrldq \$8,$T1
|
psrldq \$8,$T1
|
||||||
pslldq \$8,$T2 #
|
pslldq \$8,$T2 #
|
||||||
pxor $T1,$Xhi
|
pxor $T1,$Xhi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user