md5/asm/md5-[586|x86_64].pl: +15% on Atom.

[MD5 is hardly relevant, just cleaning up repository]
This commit is contained in:
Andy Polyakov 2014-05-04 16:56:00 +02:00
parent 496f2b148b
commit b943b7d2c7
2 changed files with 20 additions and 10 deletions

View File

@ -56,14 +56,14 @@ sub R0
&lea($a,&DWP($t,$a,$tmp2,1)); &lea($a,&DWP($t,$a,$tmp2,1));
&xor($tmp1,$d); # F function - part 4 &xor($tmp1,$d); # F function - part 4
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
&add($a,$tmp1); &add($a,$tmp1);
&mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0
&mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1
&rotl($a,$s); &rotl($a,$s);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0
&mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1
&add($a,$b); &add($a,$b);
} }
@ -74,13 +74,12 @@ sub R1
&comment("R1 $ki"); &comment("R1 $ki");
&lea($a,&DWP($t,$a,$tmp2,1));
&xor($tmp1,$b); # G function - part 2 &xor($tmp1,$b); # G function - part 2
&and($tmp1,$d); # G function - part 3 &and($tmp1,$d); # G function - part 3
&lea($a,&DWP($t,$a,$tmp2,1));
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
&xor($tmp1,$c); # G function - part 4 &xor($tmp1,$c); # G function - part 4
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);
&add($a,$tmp1); &add($a,$tmp1);
&mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1 &mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1
@ -108,10 +107,10 @@ if (($n & 1) == 0)
&lea($a,&DWP($t,$a,$tmp2,1)); &lea($a,&DWP($t,$a,$tmp2,1));
&add($a,$tmp1); &add($a,$tmp1);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
&rotl($a,$s); &rotl($a,$s);
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0));
&mov($tmp1,&Np($c)); &mov($tmp1,&Np($c));
} }
else else
@ -120,11 +119,11 @@ else
# make sure to do 'D' first, not 'B', else we clash with # make sure to do 'D' first, not 'B', else we clash with
# the last add from the previous round. # the last add from the previous round.
&lea($a,&DWP($t,$a,$tmp2,1));
&add($b,$c); # MOVED FORWARD &add($b,$c); # MOVED FORWARD
&xor($tmp1,$d); # H function - part 2 &xor($tmp1,$d); # H function - part 2
&lea($a,&DWP($t,$a,$tmp2,1));
&xor($tmp1,$b); # H function - part 3 &xor($tmp1,$b); # H function - part 3
&mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2);

View File

@ -47,8 +47,8 @@ sub round2_step
$code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
$code .= <<EOF; $code .= <<EOF;
not %r11d /* not z */ not %r11d /* not z */
lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
and $x, %r12d /* x & z */ and $x, %r12d /* x & z */
lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
and $y, %r11d /* y & (not z) */ and $y, %r11d /* y & (not z) */
mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
or %r11d, %r12d /* (y & (not z)) | (x & z) */ or %r11d, %r12d /* (y & (not z)) | (x & z) */
@ -65,6 +65,7 @@ EOF
# %r10d = X[k_next] # %r10d = X[k_next]
# %r11d = y' (copy of y for the next step) # %r11d = y' (copy of y for the next step)
# Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC) # Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC)
{ my $round3_alter=0;
sub round3_step sub round3_step
{ {
my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
@ -75,10 +76,20 @@ sub round3_step
mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
xor $x, %r11d /* x ^ ... */ xor $x, %r11d /* x ^ ... */
add %r11d, $dst /* dst += ... */ add %r11d, $dst /* dst += ... */
EOF
$code .= <<EOF if ($round3_alter);
rol \$$s, $dst /* dst <<< s */ rol \$$s, $dst /* dst <<< s */
mov $x, %r11d /* (NEXT STEP) y' = $x */ mov $x, %r11d /* (NEXT STEP) y' = $x */
EOF
$code .= <<EOF if (!$round3_alter);
mov $x, %r11d /* (NEXT STEP) y' = $x */
rol \$$s, $dst /* dst <<< s */
EOF
$code .= <<EOF;
add $x, $dst /* dst += x */ add $x, $dst /* dst += x */
EOF EOF
$round3_alter^=1;
}
} }
# round4_step() does: # round4_step() does: