md5/asm/md5-[586|x86_64].pl: +15% on Atom.
[MD5 is hardly relevant, just cleaning up repository]
This commit is contained in:
parent
496f2b148b
commit
b943b7d2c7
@ -56,14 +56,14 @@ sub R0
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&xor($tmp1,$d); # F function - part 4
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
|
||||
|
||||
&add($a,$tmp1);
|
||||
&mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0
|
||||
&mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1
|
||||
|
||||
&rotl($a,$s);
|
||||
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
|
||||
&mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0
|
||||
&mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1
|
||||
|
||||
&add($a,$b);
|
||||
}
|
||||
@ -74,13 +74,12 @@ sub R1
|
||||
|
||||
&comment("R1 $ki");
|
||||
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&xor($tmp1,$b); # G function - part 2
|
||||
&and($tmp1,$d); # G function - part 3
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
|
||||
&xor($tmp1,$c); # G function - part 4
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
|
||||
|
||||
&add($a,$tmp1);
|
||||
&mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1
|
||||
@ -108,10 +107,10 @@ if (($n & 1) == 0)
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&add($a,$tmp1);
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
|
||||
|
||||
&rotl($a,$s);
|
||||
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
|
||||
&mov($tmp1,&Np($c));
|
||||
}
|
||||
else
|
||||
@ -120,11 +119,11 @@ else
|
||||
# make sure to do 'D' first, not 'B', else we clash with
|
||||
# the last add from the previous round.
|
||||
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&add($b,$c); # MOVED FORWARD
|
||||
&xor($tmp1,$d); # H function - part 2
|
||||
|
||||
&lea($a,&DWP($t,$a,$tmp2,1));
|
||||
|
||||
&xor($tmp1,$b); # H function - part 3
|
||||
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
|
||||
|
||||
|
@ -47,8 +47,8 @@ sub round2_step
|
||||
$code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
|
||||
$code .= <<EOF;
|
||||
not %r11d /* not z */
|
||||
lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
|
||||
and $x, %r12d /* x & z */
|
||||
lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
|
||||
and $y, %r11d /* y & (not z) */
|
||||
mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
|
||||
or %r11d, %r12d /* (y & (not z)) | (x & z) */
|
||||
@ -65,6 +65,7 @@ EOF
|
||||
# %r10d = X[k_next]
|
||||
# %r11d = y' (copy of y for the next step)
|
||||
# Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC)
|
||||
{ my $round3_alter=0;
|
||||
sub round3_step
|
||||
{
|
||||
my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
|
||||
@ -75,10 +76,20 @@ sub round3_step
|
||||
mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
|
||||
xor $x, %r11d /* x ^ ... */
|
||||
add %r11d, $dst /* dst += ... */
|
||||
EOF
|
||||
$code .= <<EOF if ($round3_alter);
|
||||
rol \$$s, $dst /* dst <<< s */
|
||||
mov $x, %r11d /* (NEXT STEP) y' = $x */
|
||||
EOF
|
||||
$code .= <<EOF if (!$round3_alter);
|
||||
mov $x, %r11d /* (NEXT STEP) y' = $x */
|
||||
rol \$$s, $dst /* dst <<< s */
|
||||
EOF
|
||||
$code .= <<EOF;
|
||||
add $x, $dst /* dst += x */
|
||||
EOF
|
||||
$round3_alter^=1;
|
||||
}
|
||||
}
|
||||
|
||||
# round4_step() does:
|
||||
|
Loading…
x
Reference in New Issue
Block a user