aes-ppc.pl, sha512-ppc.pl: comply even with Embedded ABI specification

(most restrictive about r2 and r13 usage).
This commit is contained in:
Andy Polyakov 2012-01-13 09:16:52 +00:00
parent a50bce82ec
commit 23b93b587b
2 changed files with 26 additions and 29 deletions

View File

@ -68,7 +68,7 @@ $key="r5";
$Tbl0="r3"; $Tbl0="r3";
$Tbl1="r6"; $Tbl1="r6";
$Tbl2="r7"; $Tbl2="r7";
$Tbl3="r2"; $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
$s0="r8"; $s0="r8";
$s1="r9"; $s1="r9";
@ -76,7 +76,7 @@ $s2="r10";
$s3="r11"; $s3="r11";
$t0="r12"; $t0="r12";
$t1="r13"; $t1="r0"; # stay away from "r13";
$t2="r14"; $t2="r14";
$t3="r15"; $t3="r15";
@ -100,9 +100,6 @@ $acc13="r29";
$acc14="r30"; $acc14="r30";
$acc15="r31"; $acc15="r31";
# stay away from TLS pointer
if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
$mask80=$Tbl2; $mask80=$Tbl2;
$mask1b=$Tbl3; $mask1b=$Tbl3;
@ -337,8 +334,7 @@ $code.=<<___;
$STU $sp,-$FRAME($sp) $STU $sp,-$FRAME($sp)
mflr r0 mflr r0
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
@ -371,6 +367,7 @@ Lenc_unaligned_ok:
lwz $s3,12($inp) lwz $s3,12($inp)
bl LAES_Te bl LAES_Te
bl Lppc_AES_encrypt_compact bl Lppc_AES_encrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
stw $s0,0($out) stw $s0,0($out)
stw $s1,4($out) stw $s1,4($out)
stw $s2,8($out) stw $s2,8($out)
@ -417,6 +414,7 @@ Lenc_xpage:
bl LAES_Te bl LAES_Te
bl Lppc_AES_encrypt_compact bl Lppc_AES_encrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
extrwi $acc00,$s0,8,0 extrwi $acc00,$s0,8,0
extrwi $acc01,$s0,8,8 extrwi $acc01,$s0,8,8
@ -449,8 +447,6 @@ Lenc_xpage:
Lenc_done: Lenc_done:
$POP r0,`$FRAME+$LRSAVE`($sp) $POP r0,`$FRAME+$LRSAVE`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp)
@ -771,8 +767,7 @@ Lenc_compact_done:
$STU $sp,-$FRAME($sp) $STU $sp,-$FRAME($sp)
mflr r0 mflr r0
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
@ -805,6 +800,7 @@ Ldec_unaligned_ok:
lwz $s3,12($inp) lwz $s3,12($inp)
bl LAES_Td bl LAES_Td
bl Lppc_AES_decrypt_compact bl Lppc_AES_decrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
stw $s0,0($out) stw $s0,0($out)
stw $s1,4($out) stw $s1,4($out)
stw $s2,8($out) stw $s2,8($out)
@ -851,6 +847,7 @@ Ldec_xpage:
bl LAES_Td bl LAES_Td
bl Lppc_AES_decrypt_compact bl Lppc_AES_decrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
extrwi $acc00,$s0,8,0 extrwi $acc00,$s0,8,0
extrwi $acc01,$s0,8,8 extrwi $acc01,$s0,8,8
@ -883,8 +880,6 @@ Ldec_xpage:
Ldec_done: Ldec_done:
$POP r0,`$FRAME+$LRSAVE`($sp) $POP r0,`$FRAME+$LRSAVE`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp)

View File

@ -110,7 +110,7 @@ $B ="r9";
$C ="r10"; $C ="r10";
$D ="r11"; $D ="r11";
$E ="r12"; $E ="r12";
$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer $F =$t1; $t1 = "r0"; # stay away from "r13";
$G ="r14"; $G ="r14";
$H ="r15"; $H ="r15";
@ -123,19 +123,18 @@ $inp="r31"; # reassigned $inp! aliases with @X[15]
sub ROUND_00_15 { sub ROUND_00_15 {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
$code.=<<___; $code.=<<___;
$LD $T,`$i*$SZ`($Tbl)
$ROR $a0,$e,$Sigma1[0] $ROR $a0,$e,$Sigma1[0]
$ROR $a1,$e,$Sigma1[1] $ROR $a1,$e,$Sigma1[1]
and $t0,$f,$e and $t0,$f,$e
andc $t1,$g,$e
add $T,$T,$h
xor $a0,$a0,$a1 xor $a0,$a0,$a1
add $h,$h,$t1
andc $t1,$g,$e
$ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
or $t0,$t0,$t1 ; Ch(e,f,g) or $t0,$t0,$t1 ; Ch(e,f,g)
add $T,$T,@X[$i] add $h,$h,@X[$i%16]
xor $a0,$a0,$a1 ; Sigma1(e) xor $a0,$a0,$a1 ; Sigma1(e)
add $T,$T,$t0 add $h,$h,$t0
add $T,$T,$a0 add $h,$h,$a0
$ROR $a0,$a,$Sigma0[0] $ROR $a0,$a,$Sigma0[0]
$ROR $a1,$a,$Sigma0[1] $ROR $a1,$a,$Sigma0[1]
@ -146,9 +145,14 @@ $code.=<<___;
xor $t0,$t0,$t1 xor $t0,$t0,$t1
and $t1,$b,$c and $t1,$b,$c
xor $a0,$a0,$a1 ; Sigma0(a) xor $a0,$a0,$a1 ; Sigma0(a)
add $d,$d,$T add $d,$d,$h
xor $t0,$t0,$t1 ; Maj(a,b,c) xor $t0,$t0,$t1 ; Maj(a,b,c)
add $h,$T,$a0 ___
$code.=<<___ if ($i<15);
$LD $t1,`($i+1)*$SZ`($Tbl)
___
$code.=<<___;
add $h,$h,$a0
add $h,$h,$t0 add $h,$h,$t0
___ ___
@ -169,10 +173,11 @@ $code.=<<___;
add @X[$i],@X[$i],@X[($i+9)%16] add @X[$i],@X[$i],@X[($i+9)%16]
xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f])
xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f])
$LD $t1,`$i*$SZ`($Tbl)
add @X[$i],@X[$i],$a0 add @X[$i],@X[$i],$a0
add @X[$i],@X[$i],$t0 add @X[$i],@X[$i],$t0
___ ___
&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
} }
$code=<<___; $code=<<___;
@ -188,8 +193,6 @@ $func:
$PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
@ -283,8 +286,6 @@ Lmemcpy:
Ldone: Ldone:
$POP r0,`$FRAME+$LRSAVE`($sp) $POP r0,`$FRAME+$LRSAVE`($sp)
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
$POP r13,`$FRAME-$SIZE_T*19`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp)
@ -312,6 +313,7 @@ Ldone:
.align 4 .align 4
Lsha2_block_private: Lsha2_block_private:
$LD $t1,0($Tbl)
___ ___
for($i=0;$i<16;$i++) { for($i=0;$i<16;$i++) {
$code.=<<___ if ($SZ==4); $code.=<<___ if ($SZ==4);
@ -328,8 +330,8 @@ ___
unshift(@V,pop(@V)); unshift(@V,pop(@V));
} }
$code.=<<___; $code.=<<___;
li $T,`$rounds/16-1` li $t0,`$rounds/16-1`
mtctr $T mtctr $t0
.align 4 .align 4
Lrounds: Lrounds:
addi $Tbl,$Tbl,`16*$SZ` addi $Tbl,$Tbl,`16*$SZ`