sha256-586.pl: minor optimization, +0-2% on all CPUs, +7% on Westmere.

This commit is contained in:
Andy Polyakov 2011-09-17 12:57:33 +00:00
parent d2fd65f6f6
commit 7470276a25

View File

@ -14,8 +14,8 @@
# Pentium PIII P4 AMD K8 Core2
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
# x86 asm 40 30 35 20 20
# x86_64 asm(*) - - 21 15.8 16.5
# x86 asm 40 30 33 20 18
# x86_64 asm(*) - - 21 16 16
#
# (*) x86_64 assembler performance is presented for reference
# purposes.
@ -48,20 +48,19 @@ sub BODY_00_15() {
my $in_16_63=shift;
&mov ("ecx",$E);
&add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7]
&ror ("ecx",6);
&mov ("edi",$E);
&ror ("edi",11);
&add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
&ror ("ecx",25-11);
&mov ("esi",$Foff);
&xor ("ecx","edi");
&ror ("edi",25-11);
&xor ("ecx",$E);
&ror ("ecx",11-6);
&mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
&xor ("ecx","edi"); # Sigma1(e)
&xor ("ecx",$E);
&ror ("ecx",6); # Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
&mov ($Eoff,$E); # modulo-scheduled
&xor ("esi","edi");
&mov ($Eoff,$E); # modulo-scheduled
&mov ("ecx",$A);
&and ("esi",$E);
&mov ($E,$Doff); # e becomes d, which is e in next iteration
@ -69,14 +68,14 @@ sub BODY_00_15() {
&mov ("edi",$A);
&add ($T,"esi"); # T += Ch(e,f,g)
&ror ("ecx",2);
&ror ("ecx",22-13);
&add ($T,$Hoff); # T += h
&ror ("edi",13);
&xor ("ecx",$A);
&ror ("ecx",13-2);
&mov ("esi",$Boff);
&xor ("ecx","edi");
&ror ("edi",22-13);
&xor ("ecx",$A);
&ror ("ecx",2); # Sigma0(a)
&add ($E,$T); # d += T
&xor ("ecx","edi"); # Sigma0(a)
&mov ("edi",$Coff);
&add ($T,"ecx"); # T += Sigma0(a)
@ -168,23 +167,22 @@ sub BODY_00_15() {
&set_label("16_63",16);
&mov ("esi",$T);
&mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
&shr ($T,3);
&ror ("esi",7);
&xor ($T,"esi");
&ror ("esi",18-7);
&mov ("edi","ecx");
&xor ($T,"esi"); # T = sigma0(X[-15])
&xor ("esi",$T);
&ror ("esi",7);
&shr ($T,3);
&shr ("ecx",10);
&mov ("esi",&DWP(4*(8+15+16),"esp"));
&ror ("edi",17);
&xor ("ecx","edi");
&ror ("edi",19-17);
&add ($T,"esi"); # T += X[-16]
&xor ("edi","ecx") # sigma1(X[-2])
&xor ($T,"esi"); # T = sigma0(X[-15])
&xor ("edi","ecx");
&ror ("edi",17);
&shr ("ecx",10);
&add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
&xor ("edi","ecx"); # sigma1(X[-2])
&add ($T,"edi"); # T += sigma1(X[-2])
# &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1)
&add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
# &add ($T,"edi"); # T += sigma1(X[-2])
# &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
&BODY_00_15(1);