rc4-s390x.pl: allow for older assembler and optimize character loop.
This commit is contained in:
parent
13c3a1defa
commit
c558c99fd8
@ -9,9 +9,9 @@
|
|||||||
#
|
#
|
||||||
# February 2009
|
# February 2009
|
||||||
#
|
#
|
||||||
# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to avoid
|
# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
|
||||||
# arithmetic instructions, but adhere to load and load address in
|
# "cluster" Address Generation Interlocks, so that one pipeline stall
|
||||||
# order to minimize Address Generation Interlock.
|
# resolves several dependencies.
|
||||||
|
|
||||||
$rp="%r14";
|
$rp="%r14";
|
||||||
$sp="%r15";
|
$sp="%r15";
|
||||||
@ -43,10 +43,10 @@ RC4:
|
|||||||
llgc $XX[0],0($key)
|
llgc $XX[0],0($key)
|
||||||
llgc $YY,1($key)
|
llgc $YY,1($key)
|
||||||
la $XX[0],1($XX[0])
|
la $XX[0],1($XX[0])
|
||||||
llgcr $XX[0],$XX[0]
|
nill $XX[0],0xff
|
||||||
llgc $TX[0],2($XX[0],$key)
|
|
||||||
srlg $cnt,$len,3
|
srlg $cnt,$len,3
|
||||||
ltgr $cnt,$cnt
|
ltgr $cnt,$cnt
|
||||||
|
llgc $TX[0],2($XX[0],$key)
|
||||||
jz .Lshort
|
jz .Lshort
|
||||||
j .Loop8
|
j .Loop8
|
||||||
|
|
||||||
@ -56,17 +56,17 @@ ___
|
|||||||
for ($i=0;$i<8;$i++) {
|
for ($i=0;$i<8;$i++) {
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
la $YY,0($YY,$TX[0]) # $i
|
la $YY,0($YY,$TX[0]) # $i
|
||||||
llgcr $YY,$YY
|
nill $YY,255
|
||||||
la $XX[1],1($XX[0])
|
la $XX[1],1($XX[0])
|
||||||
llgcr $XX[1],$XX[1]
|
nill $XX[1],255
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($i==1);
|
||||||
|
llgc $acc,2($TY,$key)
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($i>1);
|
$code.=<<___ if ($i>1);
|
||||||
sllg $acc,$acc,8
|
sllg $acc,$acc,8
|
||||||
ic $acc,2($TY,$key)
|
ic $acc,2($TY,$key)
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($i==1);
|
|
||||||
llgc $acc,2($TY,$key)
|
|
||||||
___
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
llgc $TY,2($YY,$key)
|
llgc $TY,2($YY,$key)
|
||||||
stc $TX[0],2($YY,$key)
|
stc $TX[0],2($YY,$key)
|
||||||
@ -77,7 +77,7 @@ $code.=<<___;
|
|||||||
la $TX[1],0($TX[0])
|
la $TX[1],0($TX[0])
|
||||||
.Lcmov$i:
|
.Lcmov$i:
|
||||||
la $TY,0($TY,$TX[0])
|
la $TY,0($TY,$TX[0])
|
||||||
llgcr $TY,$TY
|
nill $TY,255
|
||||||
___
|
___
|
||||||
push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
||||||
}
|
}
|
||||||
@ -101,18 +101,18 @@ $code.=<<___;
|
|||||||
.align 16
|
.align 16
|
||||||
.Loop1:
|
.Loop1:
|
||||||
la $YY,0($YY,$TX[0])
|
la $YY,0($YY,$TX[0])
|
||||||
llgcr $YY,$YY
|
nill $YY,255
|
||||||
llgc $TY,2($YY,$key)
|
llgc $TY,2($YY,$key)
|
||||||
stc $TX[0],2($YY,$key)
|
stc $TX[0],2($YY,$key)
|
||||||
stc $TY,2($XX[0],$key)
|
stc $TY,2($XX[0],$key)
|
||||||
la $TY,0($TY,$TX[0])
|
ar $TY,$TX[0]
|
||||||
llgcr $TY,$TY
|
ahi $XX[0],1
|
||||||
la $XX[0],1($XX[0])
|
nill $TY,255
|
||||||
llgcr $XX[0],$XX[0]
|
nill $XX[0],255
|
||||||
llgc $TY,2($TY,$key)
|
|
||||||
llgc $TX[0],2($XX[0],$key)
|
|
||||||
llgc $acc,0($inp)
|
llgc $acc,0($inp)
|
||||||
la $inp,1($inp)
|
la $inp,1($inp)
|
||||||
|
llgc $TY,2($TY,$key)
|
||||||
|
llgc $TX[0],2($XX[0],$key)
|
||||||
xr $acc,$TY
|
xr $acc,$TY
|
||||||
stc $acc,0($out)
|
stc $acc,0($out)
|
||||||
la $out,1($out)
|
la $out,1($out)
|
||||||
@ -168,8 +168,8 @@ RC4_set_key:
|
|||||||
la $idx,0($idx,$acc)
|
la $idx,0($idx,$acc)
|
||||||
la $ikey,1($ikey)
|
la $ikey,1($ikey)
|
||||||
la $idx,0($idx,$dat)
|
la $idx,0($idx,$dat)
|
||||||
|
nill $idx,255
|
||||||
la $iinp,1($iinp)
|
la $iinp,1($iinp)
|
||||||
llgcr $idx,$idx
|
|
||||||
tml $ikey,255
|
tml $ikey,255
|
||||||
llgc $dat,2($idx,$key)
|
llgc $dat,2($idx,$key)
|
||||||
stc $dat,2+256-1($ikey,$key)
|
stc $dat,2+256-1($ikey,$key)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user