Fix s390x bugs and correct performance coefficients.
This commit is contained in:
parent
c504a5e783
commit
251718e4c1
@ -23,7 +23,7 @@
|
||||
# for CBC is not utilized, nor multiple blocks are ever processed.
|
||||
# Then software key schedule can be postponed till hardware support
|
||||
# detection... Performance improvement over assembler is reportedly
|
||||
# ~2.5x, but can reach >15x [naturally on larger chunks] if proper
|
||||
# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
|
||||
# support is implemented.
|
||||
|
||||
$t1="%r0";
|
||||
|
@ -13,7 +13,7 @@
|
||||
#
|
||||
# Performance is >30% better than gcc 3.3 generated code. But the real
|
||||
# twist is that SHA1 hardware support is detected and utilized. In
|
||||
# which case performance can reach further >8x for larger chunks.
|
||||
# which case performance can reach further >4.5x for larger chunks.
|
||||
|
||||
$kimdfunc=1; # magic function code for kimd instruction
|
||||
|
||||
@ -160,6 +160,7 @@ $code.=<<___ if ($kimdfunc);
|
||||
lgr %r2,$inp
|
||||
sllg %r3,$len,6
|
||||
.long 0xb93e0002 # kimd %r0,%r2
|
||||
brc 1,.-4 # pay attention to "partial completion"
|
||||
br %r14
|
||||
.Lsoftware:
|
||||
___
|
||||
|
@ -16,7 +16,7 @@
|
||||
# "pathologically" high, in particular in comparison to other SHA
|
||||
# modules). But the real twist is that it detects if hardware support
|
||||
# for SHA256 is available and in such case utilizes it. Then the
|
||||
# performance can reach >12x of assembler one for larger chunks.
|
||||
# performance can reach >6.5x of assembler one for larger chunks.
|
||||
#
|
||||
# sha512_block_data_order is ~70% faster than gcc 3.3 generated code.
|
||||
|
||||
@ -219,6 +219,7 @@ $code.=<<___ if ($kimdfunc);
|
||||
lgr %r2,$inp
|
||||
sllg %r3,$len,`log(16*$SZ)/log(2)`
|
||||
.long 0xb93e0002 # kimd %r0,%r2
|
||||
brc 1,.-4 # pay attention to "partial completion"
|
||||
br %r14
|
||||
.Lsoftware:
|
||||
___
|
||||
|
Loading…
x
Reference in New Issue
Block a user