s390x assembler pack: tune-up and support for new z196 hardware.
This commit is contained in:
@@ -28,6 +28,15 @@
|
||||
# remains z/Architecture specific. On z990 it was measured to perform
|
||||
# 2.8x better than 32-bit code generated by gcc 4.3.
|
||||
|
||||
# March 2011.
|
||||
#
|
||||
# Support for hardware KIMD-GHASH is verified to produce correct
|
||||
# result and therefore is engaged. On z196 it was measured to process
|
||||
# 8KB buffer ~7 faster than software implementation. It's not as
|
||||
# impressive for smaller buffer sizes and for smallest 16-bytes buffer
|
||||
# it's actually almost 2 times slower. Which is the reason why
|
||||
# KIMD-GHASH is not used in gcm_gmult_4bit.
|
||||
|
||||
$flavour = shift;
|
||||
|
||||
if ($flavour =~ /3[12]/) {
|
||||
@@ -41,7 +50,7 @@ if ($flavour =~ /3[12]/) {
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
$softonly=1; # disable hardware support for now
|
||||
$softonly=0;
|
||||
|
||||
$Zhi="%r0";
|
||||
$Zlo="%r1";
|
||||
@@ -70,7 +79,7 @@ $code.=<<___;
|
||||
.align 32
|
||||
gcm_gmult_4bit:
|
||||
___
|
||||
$code.=<<___ if(!$softonly);
|
||||
$code.=<<___ if(!$softonly && 0); # hardware is slow for single block...
|
||||
larl %r1,OPENSSL_s390xcap_P
|
||||
lg %r0,0(%r1)
|
||||
tmhl %r0,0x4000 # check for message-security-assist
|
||||
|
||||
Reference in New Issue
Block a user