aesp8-ppc.pl: add CTR mode.
This commit is contained in:
parent
7241a4c7fd
commit
764fe518da
@ -30,6 +30,7 @@ if ($flavour =~ /64/) {
|
||||
$POP ="ld";
|
||||
$PUSH ="std";
|
||||
$UCMP ="cmpld";
|
||||
$SHL ="sldi";
|
||||
} elsif ($flavour =~ /32/) {
|
||||
$SIZE_T =4;
|
||||
$LRSAVE =$SIZE_T;
|
||||
@ -37,6 +38,7 @@ if ($flavour =~ /64/) {
|
||||
$POP ="lwz";
|
||||
$PUSH ="stw";
|
||||
$UCMP ="cmplw";
|
||||
$SHL ="slwi";
|
||||
} else { die "nonsense $flavour"; }
|
||||
|
||||
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
||||
@ -1211,6 +1213,658 @@ Lcbc_dec8x_done:
|
||||
___
|
||||
}} }}}
|
||||
|
||||
#########################################################################
|
||||
{{{ # CTR procedure[s] #
|
||||
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
|
||||
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
||||
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
|
||||
map("v$_",(4..11));
|
||||
my $dat=$tmp;
|
||||
|
||||
$code.=<<___;
|
||||
.globl .${prefix}_ctr32_encrypt_blocks
|
||||
.align 5
|
||||
.${prefix}_ctr32_encrypt_blocks:
|
||||
${UCMP}i $len,1
|
||||
bltlr-
|
||||
|
||||
lis r0,0xfff0
|
||||
mfspr $vrsave,256
|
||||
mtspr 256,r0
|
||||
|
||||
li $idx,15
|
||||
vxor $rndkey0,$rndkey0,$rndkey0
|
||||
le?vspltisb $tmp,0x0f
|
||||
|
||||
lvx $ivec,0,$ivp # load [unaligned] iv
|
||||
lvsl $inpperm,0,$ivp
|
||||
lvx $inptail,$idx,$ivp
|
||||
vspltisb $one,1
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
vperm $ivec,$ivec,$inptail,$inpperm
|
||||
vsldoi $one,$rndkey0,$one,1
|
||||
|
||||
neg r11,$inp
|
||||
?lvsl $keyperm,0,$key # prepare for unaligned key
|
||||
lwz $rounds,240($key)
|
||||
|
||||
lvsr $inpperm,0,r11 # prepare for unaligned load
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,15 # 15 is not typo
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
|
||||
srwi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
subi $rounds,$rounds,1
|
||||
|
||||
${UCMP}i $len,8
|
||||
bge _aesp8_ctr32_encrypt8x
|
||||
|
||||
?lvsr $outperm,0,$out # prepare for unaligned store
|
||||
vspltisb $outmask,-1
|
||||
lvx $outhead,0,$out
|
||||
?vperm $outmask,$rndkey0,$outmask,$outperm
|
||||
le?vxor $outperm,$outperm,$tmp
|
||||
|
||||
lvx $rndkey0,0,$key
|
||||
mtctr $rounds
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vxor $inout,$ivec,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
b Loop_ctr32_enc
|
||||
|
||||
.align 5
|
||||
Loop_ctr32_enc:
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vcipher $inout,$inout,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
bdnz Loop_ctr32_enc
|
||||
|
||||
vadduwm $ivec,$ivec,$one
|
||||
vmr $dat,$inptail
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,16
|
||||
subic. $len,$len,1 # blocks--
|
||||
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $inout,$inout,$rndkey1
|
||||
lvx $rndkey1,$idx,$key
|
||||
vperm $dat,$dat,$inptail,$inpperm
|
||||
li $idx,16
|
||||
?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
|
||||
lvx $rndkey0,0,$key
|
||||
vxor $dat,$dat,$rndkey1 # last round key
|
||||
vcipherlast $inout,$inout,$dat
|
||||
|
||||
lvx $rndkey1,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
vperm $inout,$inout,$inout,$outperm
|
||||
vsel $dat,$outhead,$inout,$outmask
|
||||
mtctr $rounds
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vmr $outhead,$inout
|
||||
vxor $inout,$ivec,$rndkey0
|
||||
lvx $rndkey0,$idx,$key
|
||||
addi $idx,$idx,16
|
||||
stvx $dat,0,$out
|
||||
addi $out,$out,16
|
||||
bne Loop_ctr32_enc
|
||||
|
||||
addi $out,$out,-1
|
||||
lvx $inout,0,$out # redundant in aligned case
|
||||
vsel $inout,$outhead,$inout,$outmask
|
||||
stvx $inout,0,$out
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,6,0
|
||||
.long 0
|
||||
___
|
||||
#########################################################################
|
||||
{{ # Optimized CTR procedure #
|
||||
my $key_="r11";
|
||||
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
||||
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
|
||||
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
|
||||
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
||||
# v26-v31 last 6 round keys
|
||||
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
||||
my ($two,$three,$four)=($outhead,$outperm,$outmask);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
_aesp8_ctr32_encrypt8x:
|
||||
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
||||
li r10,`$FRAME+8*16+15`
|
||||
li r11,`$FRAME+8*16+31`
|
||||
stvx v20,r10,$sp # ABI says so
|
||||
addi r10,r10,32
|
||||
stvx v21,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v22,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v23,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v24,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v25,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v26,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v27,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v28,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx v29,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx v30,r10,$sp
|
||||
stvx v31,r11,$sp
|
||||
li r0,-1
|
||||
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
||||
li $x10,0x10
|
||||
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
||||
li $x20,0x20
|
||||
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
||||
li $x30,0x30
|
||||
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
||||
li $x40,0x40
|
||||
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
||||
li $x50,0x50
|
||||
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
||||
li $x60,0x60
|
||||
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
||||
li $x70,0x70
|
||||
mtspr 256,r0
|
||||
|
||||
subi $rounds,$rounds,3 # -4 in total
|
||||
|
||||
lvx $rndkey0,$x00,$key # load key schedule
|
||||
lvx v30,$x10,$key
|
||||
addi $key,$key,0x20
|
||||
lvx v31,$x00,$key
|
||||
?vperm $rndkey0,$rndkey0,v30,$keyperm
|
||||
addi $key_,$sp,$FRAME+15
|
||||
mtctr $rounds
|
||||
|
||||
Load_ctr32_enc_key:
|
||||
?vperm v24,v30,v31,$keyperm
|
||||
lvx v30,$x10,$key
|
||||
addi $key,$key,0x20
|
||||
stvx v24,$x00,$key_ # off-load round[1]
|
||||
?vperm v25,v31,v30,$keyperm
|
||||
lvx v31,$x00,$key
|
||||
stvx v25,$x10,$key_ # off-load round[2]
|
||||
addi $key_,$key_,0x20
|
||||
bdnz Load_ctr32_enc_key
|
||||
|
||||
lvx v26,$x10,$key
|
||||
?vperm v24,v30,v31,$keyperm
|
||||
lvx v27,$x20,$key
|
||||
stvx v24,$x00,$key_ # off-load round[3]
|
||||
?vperm v25,v31,v26,$keyperm
|
||||
lvx v28,$x30,$key
|
||||
stvx v25,$x10,$key_ # off-load round[4]
|
||||
addi $key_,$sp,$FRAME+15 # rewind $key_
|
||||
?vperm v26,v26,v27,$keyperm
|
||||
lvx v29,$x40,$key
|
||||
?vperm v27,v27,v28,$keyperm
|
||||
lvx v30,$x50,$key
|
||||
?vperm v28,v28,v29,$keyperm
|
||||
lvx v31,$x60,$key
|
||||
?vperm v29,v29,v30,$keyperm
|
||||
lvx $out0,$x70,$key # borrow $out0
|
||||
?vperm v30,v30,v31,$keyperm
|
||||
lvx v24,$x00,$key_ # pre-load round[1]
|
||||
?vperm v31,v31,$out0,$keyperm
|
||||
lvx v25,$x10,$key_ # pre-load round[2]
|
||||
|
||||
vadduwm $two,$one,$one
|
||||
subi $inp,$inp,15 # undo "caller"
|
||||
$SHL $len,$len,4
|
||||
|
||||
vadduwm $out1,$ivec,$one # counter values ...
|
||||
vadduwm $out2,$ivec,$two
|
||||
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
||||
le?li $idx,8
|
||||
vadduwm $out3,$out1,$two
|
||||
vxor $out1,$out1,$rndkey0
|
||||
le?lvsl $inpperm,0,$idx
|
||||
vadduwm $out4,$out2,$two
|
||||
vxor $out2,$out2,$rndkey0
|
||||
le?vspltisb $tmp,0x0f
|
||||
vadduwm $out5,$out3,$two
|
||||
vxor $out3,$out3,$rndkey0
|
||||
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
||||
vadduwm $out6,$out4,$two
|
||||
vxor $out4,$out4,$rndkey0
|
||||
vadduwm $out7,$out5,$two
|
||||
vxor $out5,$out5,$rndkey0
|
||||
vadduwm $ivec,$out6,$two # next counter value
|
||||
vxor $out6,$out6,$rndkey0
|
||||
vxor $out7,$out7,$rndkey0
|
||||
|
||||
mtctr $rounds
|
||||
b Loop_ctr32_enc8x
|
||||
.align 5
|
||||
Loop_ctr32_enc8x:
|
||||
vcipher $out0,$out0,v24
|
||||
vcipher $out1,$out1,v24
|
||||
vcipher $out2,$out2,v24
|
||||
vcipher $out3,$out3,v24
|
||||
vcipher $out4,$out4,v24
|
||||
vcipher $out5,$out5,v24
|
||||
vcipher $out6,$out6,v24
|
||||
vcipher $out7,$out7,v24
|
||||
Loop_ctr32_enc8x_middle:
|
||||
lvx v24,$x20,$key_ # round[3]
|
||||
addi $key_,$key_,0x20
|
||||
|
||||
vcipher $out0,$out0,v25
|
||||
vcipher $out1,$out1,v25
|
||||
vcipher $out2,$out2,v25
|
||||
vcipher $out3,$out3,v25
|
||||
vcipher $out4,$out4,v25
|
||||
vcipher $out5,$out5,v25
|
||||
vcipher $out6,$out6,v25
|
||||
vcipher $out7,$out7,v25
|
||||
lvx v25,$x10,$key_ # round[4]
|
||||
bdnz Loop_ctr32_enc8x
|
||||
|
||||
subic r11,$len,256 # $len-256, borrow $key_
|
||||
vcipher $out0,$out0,v24
|
||||
vcipher $out1,$out1,v24
|
||||
vcipher $out2,$out2,v24
|
||||
vcipher $out3,$out3,v24
|
||||
vcipher $out4,$out4,v24
|
||||
vcipher $out5,$out5,v24
|
||||
vcipher $out6,$out6,v24
|
||||
vcipher $out7,$out7,v24
|
||||
|
||||
subfe r0,r0,r0 # borrow?-1:0
|
||||
vcipher $out0,$out0,v25
|
||||
vcipher $out1,$out1,v25
|
||||
vcipher $out2,$out2,v25
|
||||
vcipher $out3,$out3,v25
|
||||
vcipher $out4,$out4,v25
|
||||
vcipher $out5,$out5,v25
|
||||
vcipher $out6,$out6,v25
|
||||
vcipher $out7,$out7,v25
|
||||
|
||||
and r0,r0,r11
|
||||
addi $key_,$sp,$FRAME+15 # rewind $key_
|
||||
vcipher $out0,$out0,v26
|
||||
vcipher $out1,$out1,v26
|
||||
vcipher $out2,$out2,v26
|
||||
vcipher $out3,$out3,v26
|
||||
vcipher $out4,$out4,v26
|
||||
vcipher $out5,$out5,v26
|
||||
vcipher $out6,$out6,v26
|
||||
vcipher $out7,$out7,v26
|
||||
lvx v24,$x00,$key_ # re-pre-load round[1]
|
||||
|
||||
subic $len,$len,129 # $len-=129
|
||||
vcipher $out0,$out0,v27
|
||||
addi $len,$len,1 # $len-=128 really
|
||||
vcipher $out1,$out1,v27
|
||||
vcipher $out2,$out2,v27
|
||||
vcipher $out3,$out3,v27
|
||||
vcipher $out4,$out4,v27
|
||||
vcipher $out5,$out5,v27
|
||||
vcipher $out6,$out6,v27
|
||||
vcipher $out7,$out7,v27
|
||||
lvx v25,$x10,$key_ # re-pre-load round[2]
|
||||
|
||||
vcipher $out0,$out0,v28
|
||||
lvx_u $in0,$x00,$inp # load input
|
||||
vcipher $out1,$out1,v28
|
||||
lvx_u $in1,$x10,$inp
|
||||
vcipher $out2,$out2,v28
|
||||
lvx_u $in2,$x20,$inp
|
||||
vcipher $out3,$out3,v28
|
||||
lvx_u $in3,$x30,$inp
|
||||
vcipher $out4,$out4,v28
|
||||
lvx_u $in4,$x40,$inp
|
||||
vcipher $out5,$out5,v28
|
||||
lvx_u $in5,$x50,$inp
|
||||
vcipher $out6,$out6,v28
|
||||
lvx_u $in6,$x60,$inp
|
||||
vcipher $out7,$out7,v28
|
||||
lvx_u $in7,$x70,$inp
|
||||
addi $inp,$inp,0x80
|
||||
|
||||
vcipher $out0,$out0,v29
|
||||
le?vperm $in0,$in0,$in0,$inpperm
|
||||
vcipher $out1,$out1,v29
|
||||
le?vperm $in1,$in1,$in1,$inpperm
|
||||
vcipher $out2,$out2,v29
|
||||
le?vperm $in2,$in2,$in2,$inpperm
|
||||
vcipher $out3,$out3,v29
|
||||
le?vperm $in3,$in3,$in3,$inpperm
|
||||
vcipher $out4,$out4,v29
|
||||
le?vperm $in4,$in4,$in4,$inpperm
|
||||
vcipher $out5,$out5,v29
|
||||
le?vperm $in5,$in5,$in5,$inpperm
|
||||
vcipher $out6,$out6,v29
|
||||
le?vperm $in6,$in6,$in6,$inpperm
|
||||
vcipher $out7,$out7,v29
|
||||
le?vperm $in7,$in7,$in7,$inpperm
|
||||
|
||||
add $inp,$inp,r0 # $inp is adjusted in such
|
||||
# way that at exit from the
|
||||
# loop inX-in7 are loaded
|
||||
# with last "words"
|
||||
subfe. r0,r0,r0 # borrow?-1:0
|
||||
vcipher $out0,$out0,v30
|
||||
vxor $in0,$in0,v31 # xor with last round key
|
||||
vcipher $out1,$out1,v30
|
||||
vxor $in1,$in1,v31
|
||||
vcipher $out2,$out2,v30
|
||||
vxor $in2,$in2,v31
|
||||
vcipher $out3,$out3,v30
|
||||
vxor $in3,$in3,v31
|
||||
vcipher $out4,$out4,v30
|
||||
vxor $in4,$in4,v31
|
||||
vcipher $out5,$out5,v30
|
||||
vxor $in5,$in5,v31
|
||||
vcipher $out6,$out6,v30
|
||||
vxor $in6,$in6,v31
|
||||
vcipher $out7,$out7,v30
|
||||
vxor $in7,$in7,v31
|
||||
|
||||
bne Lctr32_enc8x_break # did $len-129 borrow?
|
||||
|
||||
vcipherlast $in0,$out0,$in0
|
||||
vcipherlast $in1,$out1,$in1
|
||||
vadduwm $out1,$ivec,$one # counter values ...
|
||||
vcipherlast $in2,$out2,$in2
|
||||
vadduwm $out2,$ivec,$two
|
||||
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
||||
vcipherlast $in3,$out3,$in3
|
||||
vadduwm $out3,$out1,$two
|
||||
vxor $out1,$out1,$rndkey0
|
||||
vcipherlast $in4,$out4,$in4
|
||||
vadduwm $out4,$out2,$two
|
||||
vxor $out2,$out2,$rndkey0
|
||||
vcipherlast $in5,$out5,$in5
|
||||
vadduwm $out5,$out3,$two
|
||||
vxor $out3,$out3,$rndkey0
|
||||
vcipherlast $in6,$out6,$in6
|
||||
vadduwm $out6,$out4,$two
|
||||
vxor $out4,$out4,$rndkey0
|
||||
vcipherlast $in7,$out7,$in7
|
||||
vadduwm $out7,$out5,$two
|
||||
vxor $out5,$out5,$rndkey0
|
||||
le?vperm $in0,$in0,$in0,$inpperm
|
||||
vadduwm $ivec,$out6,$two # next counter value
|
||||
vxor $out6,$out6,$rndkey0
|
||||
le?vperm $in1,$in1,$in1,$inpperm
|
||||
vxor $out7,$out7,$rndkey0
|
||||
mtctr $rounds
|
||||
|
||||
vcipher $out0,$out0,v24
|
||||
stvx_u $in0,$x00,$out
|
||||
le?vperm $in2,$in2,$in2,$inpperm
|
||||
vcipher $out1,$out1,v24
|
||||
stvx_u $in1,$x10,$out
|
||||
le?vperm $in3,$in3,$in3,$inpperm
|
||||
vcipher $out2,$out2,v24
|
||||
stvx_u $in2,$x20,$out
|
||||
le?vperm $in4,$in4,$in4,$inpperm
|
||||
vcipher $out3,$out3,v24
|
||||
stvx_u $in3,$x30,$out
|
||||
le?vperm $in5,$in5,$in5,$inpperm
|
||||
vcipher $out4,$out4,v24
|
||||
stvx_u $in4,$x40,$out
|
||||
le?vperm $in6,$in6,$in6,$inpperm
|
||||
vcipher $out5,$out5,v24
|
||||
stvx_u $in5,$x50,$out
|
||||
le?vperm $in7,$in7,$in7,$inpperm
|
||||
vcipher $out6,$out6,v24
|
||||
stvx_u $in6,$x60,$out
|
||||
vcipher $out7,$out7,v24
|
||||
stvx_u $in7,$x70,$out
|
||||
addi $out,$out,0x80
|
||||
|
||||
b Loop_ctr32_enc8x_middle
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_break:
|
||||
cmpwi $len,-0x60
|
||||
blt Lctr32_enc8x_one
|
||||
nop
|
||||
beq Lctr32_enc8x_two
|
||||
cmpwi $len,-0x40
|
||||
blt Lctr32_enc8x_three
|
||||
nop
|
||||
beq Lctr32_enc8x_four
|
||||
cmpwi $len,-0x20
|
||||
blt Lctr32_enc8x_five
|
||||
nop
|
||||
beq Lctr32_enc8x_six
|
||||
cmpwi $len,0x00
|
||||
blt Lctr32_enc8x_seven
|
||||
|
||||
Lctr32_enc8x_eight:
|
||||
vcipherlast $out0,$out0,$in0
|
||||
vcipherlast $out1,$out1,$in1
|
||||
vcipherlast $out2,$out2,$in2
|
||||
vcipherlast $out3,$out3,$in3
|
||||
vcipherlast $out4,$out4,$in4
|
||||
vcipherlast $out5,$out5,$in5
|
||||
vcipherlast $out6,$out6,$in6
|
||||
vcipherlast $out7,$out7,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
le?vperm $out3,$out3,$out3,$inpperm
|
||||
stvx_u $out2,$x20,$out
|
||||
le?vperm $out4,$out4,$out4,$inpperm
|
||||
stvx_u $out3,$x30,$out
|
||||
le?vperm $out5,$out5,$out5,$inpperm
|
||||
stvx_u $out4,$x40,$out
|
||||
le?vperm $out6,$out6,$out6,$inpperm
|
||||
stvx_u $out5,$x50,$out
|
||||
le?vperm $out7,$out7,$out7,$inpperm
|
||||
stvx_u $out6,$x60,$out
|
||||
stvx_u $out7,$x70,$out
|
||||
addi $out,$out,0x80
|
||||
b Lctr32_enc8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_seven:
|
||||
vcipherlast $out0,$out0,$in1
|
||||
vcipherlast $out1,$out1,$in2
|
||||
vcipherlast $out2,$out2,$in3
|
||||
vcipherlast $out3,$out3,$in4
|
||||
vcipherlast $out4,$out4,$in5
|
||||
vcipherlast $out5,$out5,$in6
|
||||
vcipherlast $out6,$out6,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
le?vperm $out3,$out3,$out3,$inpperm
|
||||
stvx_u $out2,$x20,$out
|
||||
le?vperm $out4,$out4,$out4,$inpperm
|
||||
stvx_u $out3,$x30,$out
|
||||
le?vperm $out5,$out5,$out5,$inpperm
|
||||
stvx_u $out4,$x40,$out
|
||||
le?vperm $out6,$out6,$out6,$inpperm
|
||||
stvx_u $out5,$x50,$out
|
||||
stvx_u $out6,$x60,$out
|
||||
addi $out,$out,0x70
|
||||
b Lctr32_enc8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_six:
|
||||
vcipherlast $out0,$out0,$in2
|
||||
vcipherlast $out1,$out1,$in3
|
||||
vcipherlast $out2,$out2,$in4
|
||||
vcipherlast $out3,$out3,$in5
|
||||
vcipherlast $out4,$out4,$in6
|
||||
vcipherlast $out5,$out5,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
le?vperm $out3,$out3,$out3,$inpperm
|
||||
stvx_u $out2,$x20,$out
|
||||
le?vperm $out4,$out4,$out4,$inpperm
|
||||
stvx_u $out3,$x30,$out
|
||||
le?vperm $out5,$out5,$out5,$inpperm
|
||||
stvx_u $out4,$x40,$out
|
||||
stvx_u $out5,$x50,$out
|
||||
addi $out,$out,0x60
|
||||
b Lctr32_enc8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_five:
|
||||
vcipherlast $out0,$out0,$in3
|
||||
vcipherlast $out1,$out1,$in4
|
||||
vcipherlast $out2,$out2,$in5
|
||||
vcipherlast $out3,$out3,$in6
|
||||
vcipherlast $out4,$out4,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
le?vperm $out3,$out3,$out3,$inpperm
|
||||
stvx_u $out2,$x20,$out
|
||||
le?vperm $out4,$out4,$out4,$inpperm
|
||||
stvx_u $out3,$x30,$out
|
||||
stvx_u $out4,$x40,$out
|
||||
addi $out,$out,0x50
|
||||
b Lctr32_enc8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_four:
|
||||
vcipherlast $out0,$out0,$in4
|
||||
vcipherlast $out1,$out1,$in5
|
||||
vcipherlast $out2,$out2,$in6
|
||||
vcipherlast $out3,$out3,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
le?vperm $out3,$out3,$out3,$inpperm
|
||||
stvx_u $out2,$x20,$out
|
||||
stvx_u $out3,$x30,$out
|
||||
addi $out,$out,0x40
|
||||
b Lctr32_enc8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_three:
|
||||
vcipherlast $out0,$out0,$in5
|
||||
vcipherlast $out1,$out1,$in6
|
||||
vcipherlast $out2,$out2,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
le?vperm $out2,$out2,$out2,$inpperm
|
||||
stvx_u $out1,$x10,$out
|
||||
stvx_u $out2,$x20,$out
|
||||
addi $out,$out,0x30
|
||||
b Lcbc_dec8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_two:
|
||||
vcipherlast $out0,$out0,$in6
|
||||
vcipherlast $out1,$out1,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
le?vperm $out1,$out1,$out1,$inpperm
|
||||
stvx_u $out0,$x00,$out
|
||||
stvx_u $out1,$x10,$out
|
||||
addi $out,$out,0x20
|
||||
b Lcbc_dec8x_done
|
||||
|
||||
.align 5
|
||||
Lctr32_enc8x_one:
|
||||
vcipherlast $out0,$out0,$in7
|
||||
|
||||
le?vperm $out0,$out0,$out0,$inpperm
|
||||
stvx_u $out0,0,$out
|
||||
addi $out,$out,0x10
|
||||
|
||||
Lctr32_enc8x_done:
|
||||
li r10,`$FRAME+15`
|
||||
li r11,`$FRAME+31`
|
||||
stvx $inpperm,r10,$sp # wipe copies of round keys
|
||||
addi r10,r10,32
|
||||
stvx $inpperm,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx $inpperm,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx $inpperm,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx $inpperm,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx $inpperm,r11,$sp
|
||||
addi r11,r11,32
|
||||
stvx $inpperm,r10,$sp
|
||||
addi r10,r10,32
|
||||
stvx $inpperm,r11,$sp
|
||||
addi r11,r11,32
|
||||
|
||||
mtspr 256,$vrsave
|
||||
lvx v20,r10,$sp # ABI says so
|
||||
addi r10,r10,32
|
||||
lvx v21,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v22,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v23,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v24,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v25,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v26,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v27,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v28,r10,$sp
|
||||
addi r10,r10,32
|
||||
lvx v29,r11,$sp
|
||||
addi r11,r11,32
|
||||
lvx v30,r10,$sp
|
||||
lvx v31,r11,$sp
|
||||
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
||||
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
||||
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
||||
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
||||
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
||||
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
||||
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0x80,6,6,0
|
||||
.long 0
|
||||
.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
|
||||
___
|
||||
}} }}}
|
||||
|
||||
my $consts=1;
|
||||
foreach(split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval($1)/geo;
|
||||
|
@ -165,6 +165,7 @@ extern unsigned int OPENSSL_ppccap_P;
|
||||
# define HWAES_encrypt aes_p8_encrypt
|
||||
# define HWAES_decrypt aes_p8_decrypt
|
||||
# define HWAES_cbc_encrypt aes_p8_cbc_encrypt
|
||||
# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
|
||||
#endif
|
||||
|
||||
#if defined(AES_ASM) && !defined(I386_ONLY) && ( \
|
||||
|
Loading…
x
Reference in New Issue
Block a user