sha1-ppc.pl: shave off one cycle from BODY_20_39
and improve performance by 10% on POWER[78]. Reviewed-by: Kurt Roeckx <kurt@openssl.org> (cherry picked from commit 5c3598307ebbf5a88d1c39fbb2629536e443a5dd)
This commit is contained in:
parent
2be9425514
commit
a2f34441ab
@ -125,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
|||||||
my $j=$i+1;
|
my $j=$i+1;
|
||||||
$code.=<<___ if ($i<79);
|
$code.=<<___ if ($i<79);
|
||||||
add $f,$K,$e
|
add $f,$K,$e
|
||||||
|
xor $t0,$b,$d
|
||||||
rotlwi $e,$a,5
|
rotlwi $e,$a,5
|
||||||
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
||||||
add $f,$f,@X[$i%16]
|
add $f,$f,@X[$i%16]
|
||||||
xor $t0,$b,$c
|
xor $t0,$t0,$c
|
||||||
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
||||||
add $f,$f,$e
|
|
||||||
rotlwi $b,$b,30
|
|
||||||
xor $t0,$t0,$d
|
|
||||||
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
|
||||||
add $f,$f,$t0
|
add $f,$f,$t0
|
||||||
|
rotlwi $b,$b,30
|
||||||
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
||||||
|
add $f,$f,$e
|
||||||
rotlwi @X[$j%16],@X[$j%16],1
|
rotlwi @X[$j%16],@X[$j%16],1
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($i==79);
|
$code.=<<___ if ($i==79);
|
||||||
add $f,$K,$e
|
add $f,$K,$e
|
||||||
|
xor $t0,$b,$d
|
||||||
rotlwi $e,$a,5
|
rotlwi $e,$a,5
|
||||||
lwz r16,0($ctx)
|
lwz r16,0($ctx)
|
||||||
add $f,$f,@X[$i%16]
|
add $f,$f,@X[$i%16]
|
||||||
xor $t0,$b,$c
|
xor $t0,$t0,$c
|
||||||
lwz r17,4($ctx)
|
lwz r17,4($ctx)
|
||||||
add $f,$f,$e
|
add $f,$f,$t0
|
||||||
rotlwi $b,$b,30
|
rotlwi $b,$b,30
|
||||||
lwz r18,8($ctx)
|
lwz r18,8($ctx)
|
||||||
xor $t0,$t0,$d
|
|
||||||
lwz r19,12($ctx)
|
lwz r19,12($ctx)
|
||||||
add $f,$f,$t0
|
add $f,$f,$e
|
||||||
lwz r20,16($ctx)
|
lwz r20,16($ctx)
|
||||||
___
|
___
|
||||||
}
|
}
|
||||||
|
@ -13,8 +13,8 @@
|
|||||||
# always virtualized setup with possibly throttled processor.
|
# always virtualized setup with possibly throttled processor.
|
||||||
# Relative comparison is therefore more informative. This module is
|
# Relative comparison is therefore more informative. This module is
|
||||||
# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
|
# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
|
||||||
# else, SHA256 is 16% slower than sha1-ppc.pl and 2.5x slower than
|
# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
|
||||||
# hardware-assisted aes-128-cbc encrypt. SHA512 is 33% faster than
|
# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
|
||||||
# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
|
# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
|
||||||
# result is degree of computational resources' utilization. POWER8 is
|
# result is degree of computational resources' utilization. POWER8 is
|
||||||
# "massively multi-threaded chip" and difference between single- and
|
# "massively multi-threaded chip" and difference between single- and
|
||||||
|
Loading…
x
Reference in New Issue
Block a user