Pedantic polish to aes-ia64 and sha512-ia64.

This commit is contained in:
Andy Polyakov 2005-07-20 15:15:22 +00:00
parent 165a28abae
commit 0066590f98
2 changed files with 56 additions and 44 deletions

View File

@ -24,7 +24,9 @@
rk0=r8; rk1=r9; rk0=r8; rk1=r9;
prsave=r10; pfssave=r2;
lcsave=r10;
prsave=r3;
maskff=r11; maskff=r11;
twenty4=r14; twenty4=r14;
sixteen=r15; sixteen=r15;
@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43;
// Clobber: r16-r31,rk0-rk1,r32-r43 // Clobber: r16-r31,rk0-rk1,r32-r43
.align 32 .align 32
_ia64_AES_encrypt: _ia64_AES_encrypt:
.prologue
.altrp b6
.body
{ .mmi; alloc r16=ar.pfs,12,0,0,8 { .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 } mov pr.rot=1<<16 }
@ -179,20 +184,21 @@ _ia64_AES_encrypt:
.skip 16 .skip 16
AES_encrypt: AES_encrypt:
.prologue .prologue
.save ar.pfs,r2 .save ar.pfs,pfssave
{ .mmi; alloc r2=ar.pfs,3,0,12,0 { .mmi; alloc pfssave=ar.pfs,3,0,12,0
addl out8=@ltoff(AES_Te#),gp and out0=3,in0
.save ar.lc,r3 mov r3=ip }
mov r3=ar.lc } { .mmi; ADDP in0=0,in0
{ .mmi; and out0=3,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
ADDP in0=0,in0 .save ar.lc,lcsave
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds mov lcsave=ar.lc };;
.body { .mmi; ld4 out11=[out11] // AES_KEY->rounds
{ .mmi; ld8 out8=[out8] // Te0 add out8=(AES_Te#-AES_encrypt#),r3 // Te0
ld4 out11=[out11] // AES_KEY->rounds .save pr,prsave
mov prsave=pr } mov prsave=pr }
.body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0 { .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0 add out0=4,in0
@ -215,8 +221,8 @@ AES_encrypt:
ADDP in1=0,in1 ADDP in1=0,in1
(p6) br.spnt .Le_o_unaligned };; (p6) br.spnt .Le_o_unaligned };;
{ .mii; mov ar.pfs=r2 { .mii; mov ar.pfs=psfsave
mov ar.lc=r3 } mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0 { .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1 st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };; mov pr=prsave,0x1ffff };;
@ -299,10 +305,10 @@ AES_encrypt:
mov pr=prsave,0x1ffff }//;; mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4 { .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4 st1 [out0]=r27,4
mov ar.pfs=r2 };; mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28 { .mmi; st1 [out3]=r28
st1 [out2]=r29 st1 [out2]=r29
mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30 { .mmb; st1 [out1]=r30
st1 [out0]=r31 st1 [out0]=r31
br.ret.sptk.many b0 };; br.ret.sptk.many b0 };;
@ -359,6 +365,9 @@ while(<>) {
// Clobber: r16-r31,rk0-rk1,r32-r43 // Clobber: r16-r31,rk0-rk1,r32-r43
.align 32 .align 32
_ia64_AES_decrypt: _ia64_AES_decrypt:
.prologue
.altrp b6
.body
{ .mmi; alloc r16=ar.pfs,12,0,0,8 { .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 } mov pr.rot=1<<16 }
@ -471,20 +480,21 @@ _ia64_AES_decrypt:
.skip 16 .skip 16
AES_decrypt: AES_decrypt:
.prologue .prologue
.save ar.pfs,r2 .save ar.pfs,pfssave
{ .mmi; alloc r2=ar.pfs,3,0,12,0 { .mmi; alloc pfssave=ar.pfs,3,0,12,0
addl out8=@ltoff(AES_Td#),gp and out0=3,in0
.save ar.lc,r3 mov r3=ip }
mov r3=ar.lc } { .mmi; ADDP in0=0,in0
{ .mmi; and out0=3,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
ADDP in0=0,in0 .save ar.lc,lcsave
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds mov lcsave=ar.lc };;
.body { .mmi; ld4 out11=[out11] // AES_KEY->rounds
{ .mmi; ld8 out8=[out8] // Te0 add out8=(AES_Td#-AES_decrypt#),r3 // Td0
ld4 out11=[out11] // AES_KEY->rounds .save pr,prsave
mov prsave=pr } mov prsave=pr }
.body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0 { .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0 add out0=4,in0
@ -507,8 +517,8 @@ AES_decrypt:
ADDP in1=0,in1 ADDP in1=0,in1
(p6) br.spnt .Ld_o_unaligned };; (p6) br.spnt .Ld_o_unaligned };;
{ .mii; mov ar.pfs=r2 { .mii; mov ar.pfs=pfssave
mov ar.lc=r3 } mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0 { .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1 st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };; mov pr=prsave,0x1ffff };;
@ -591,10 +601,10 @@ AES_decrypt:
mov pr=prsave,0x1ffff }//;; mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4 { .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4 st1 [out0]=r27,4
mov ar.pfs=r2 };; mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28 { .mmi; st1 [out3]=r28
st1 [out2]=r29 st1 [out2]=r29
mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30 { .mmb; st1 [out1]=r30
st1 [out0]=r31 st1 [out0]=r31
br.ret.sptk.many b0 };; br.ret.sptk.many b0 };;

View File

@ -110,6 +110,8 @@ $code=<<___;
.explicit .explicit
.text .text
pfssave=r2;
lcsave=r3;
prsave=r14; prsave=r14;
K=r15; K=r15;
A=r16; B=r17; C=r18; D=r19; A=r16; B=r17; C=r18; D=r19;
@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants
.align 32 .align 32
$func: $func:
.prologue .prologue
.save ar.pfs,r2 .save ar.pfs,pfssave
{ .mmi; alloc r2=ar.pfs,3,17,0,16 { .mmi; alloc pfssave=ar.pfs,3,17,0,16
$ADDP ctx=0,r32 // 1st arg $ADDP ctx=0,r32 // 1st arg
.save ar.lc,r3 .save ar.lc,lcsave
mov r3=ar.lc } mov lcsave=ar.lc }
{ .mmi; $ADDP input=0,r33 // 2nd arg { .mmi; $ADDP input=0,r33 // 2nd arg
addl Ktbl=\@ltoff($TABLE#),gp mov num=r34 // 3rd arg
.save pr,prsave .save pr,prsave
mov prsave=pr };; mov prsave=pr };;
.body .body
{ .mii; ld8 Ktbl=[Ktbl]
mov num=r34 };; // 3rd arg
{ .mib; add r8=0*$SZ,ctx { .mib; add r8=0*$SZ,ctx
add r9=1*$SZ,ctx add r9=1*$SZ,ctx
brp.loop.imp .L_first16,.L_first16_ctop brp.loop.imp .L_first16,.L_first16_ctop
@ -151,20 +150,23 @@ $func:
brp.loop.imp .L_rest,.L_rest_ctop brp.loop.imp .L_rest,.L_rest_ctop
};; };;
// load A-H // load A-H
.Lpic_point:
{ .mmi; $LDW A=[r8],4*$SZ { .mmi; $LDW A=[r8],4*$SZ
$LDW B=[r9],4*$SZ $LDW B=[r9],4*$SZ
mov sgm0=$sigma0[2] } mov Ktbl=ip }
{ .mmi; $LDW C=[r10],4*$SZ { .mmi; $LDW C=[r10],4*$SZ
$LDW D=[r11],4*$SZ $LDW D=[r11],4*$SZ
mov sgm1=$sigma1[2] };; mov sgm0=$sigma0[2] };;
{ .mmi; $LDW E=[r8] { .mmi; $LDW E=[r8]
$LDW F=[r9] } $LDW F=[r9]
add Ktbl=($TABLE#-.Lpic_point),Ktbl }
{ .mmi; $LDW G=[r10] { .mmi; $LDW G=[r10]
$LDW H=[r11] $LDW H=[r11]
cmp.ne p15,p14=0,r35 };; // used in sha256_block cmp.ne p15,p14=0,r35 };; // used in sha256_block
.L_outer: .L_outer:
{ .mii; mov ar.lc=15 { .mii; mov sgm1=$sigma1[2]
mov ar.lc=15
mov ar.ec=1 };; mov ar.ec=1 };;
.align 32 .align 32
.L_first16: .L_first16:
@ -329,7 +331,7 @@ $code.=<<___;
(p6) add Ktbl=-$SZ*$rounds,Ktbl } (p6) add Ktbl=-$SZ*$rounds,Ktbl }
{ .mmi; $LDW r38=[r10],-4*$SZ { .mmi; $LDW r38=[r10],-4*$SZ
$LDW r39=[r11],-4*$SZ $LDW r39=[r11],-4*$SZ
(p7) mov ar.lc=r3 };; (p7) mov ar.lc=lcsave };;
{ .mmi; add A=A,r32 { .mmi; add A=A,r32
add B=B,r33 add B=B,r33
add C=C,r34 } add C=C,r34 }