Pedantic polish to aes-ia64 and sha512-ia64.
This commit is contained in:
parent
165a28abae
commit
0066590f98
@ -24,7 +24,9 @@
|
||||
|
||||
rk0=r8; rk1=r9;
|
||||
|
||||
prsave=r10;
|
||||
pfssave=r2;
|
||||
lcsave=r10;
|
||||
prsave=r3;
|
||||
maskff=r11;
|
||||
twenty4=r14;
|
||||
sixteen=r15;
|
||||
@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43;
|
||||
// Clobber: r16-r31,rk0-rk1,r32-r43
|
||||
.align 32
|
||||
_ia64_AES_encrypt:
|
||||
.prologue
|
||||
.altrp b6
|
||||
.body
|
||||
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
||||
LDKEY t0=[rk0],2*KSZ
|
||||
mov pr.rot=1<<16 }
|
||||
@ -179,20 +184,21 @@ _ia64_AES_encrypt:
|
||||
.skip 16
|
||||
AES_encrypt:
|
||||
.prologue
|
||||
.save ar.pfs,r2
|
||||
{ .mmi; alloc r2=ar.pfs,3,0,12,0
|
||||
addl out8=@ltoff(AES_Te#),gp
|
||||
.save ar.lc,r3
|
||||
mov r3=ar.lc }
|
||||
{ .mmi; and out0=3,in0
|
||||
ADDP in0=0,in0
|
||||
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
|
||||
.save ar.pfs,pfssave
|
||||
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
|
||||
and out0=3,in0
|
||||
mov r3=ip }
|
||||
{ .mmi; ADDP in0=0,in0
|
||||
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
|
||||
.save ar.lc,lcsave
|
||||
mov lcsave=ar.lc };;
|
||||
|
||||
.body
|
||||
{ .mmi; ld8 out8=[out8] // Te0
|
||||
ld4 out11=[out11] // AES_KEY->rounds
|
||||
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
|
||||
add out8=(AES_Te#-AES_encrypt#),r3 // Te0
|
||||
.save pr,prsave
|
||||
mov prsave=pr }
|
||||
|
||||
.body
|
||||
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
||||
{ .mib; cmp.ne p6,p0=out0,r0
|
||||
add out0=4,in0
|
||||
@ -215,8 +221,8 @@ AES_encrypt:
|
||||
ADDP in1=0,in1
|
||||
(p6) br.spnt .Le_o_unaligned };;
|
||||
|
||||
{ .mii; mov ar.pfs=r2
|
||||
mov ar.lc=r3 }
|
||||
{ .mii; mov ar.pfs=psfsave
|
||||
mov ar.lc=lcsave }
|
||||
{ .mmi; st4 [in1]=r16,8 // s0
|
||||
st4 [in0]=r20,8 // s1
|
||||
mov pr=prsave,0x1ffff };;
|
||||
@ -299,10 +305,10 @@ AES_encrypt:
|
||||
mov pr=prsave,0x1ffff }//;;
|
||||
{ .mmi; st1 [out1]=r26,4
|
||||
st1 [out0]=r27,4
|
||||
mov ar.pfs=r2 };;
|
||||
mov ar.pfs=pfssave };;
|
||||
{ .mmi; st1 [out3]=r28
|
||||
st1 [out2]=r29
|
||||
mov ar.lc=r3 }//;;
|
||||
mov ar.lc=lcsave }//;;
|
||||
{ .mmb; st1 [out1]=r30
|
||||
st1 [out0]=r31
|
||||
br.ret.sptk.many b0 };;
|
||||
@ -359,6 +365,9 @@ while(<>) {
|
||||
// Clobber: r16-r31,rk0-rk1,r32-r43
|
||||
.align 32
|
||||
_ia64_AES_decrypt:
|
||||
.prologue
|
||||
.altrp b6
|
||||
.body
|
||||
{ .mmi; alloc r16=ar.pfs,12,0,0,8
|
||||
LDKEY t0=[rk0],2*KSZ
|
||||
mov pr.rot=1<<16 }
|
||||
@ -471,20 +480,21 @@ _ia64_AES_decrypt:
|
||||
.skip 16
|
||||
AES_decrypt:
|
||||
.prologue
|
||||
.save ar.pfs,r2
|
||||
{ .mmi; alloc r2=ar.pfs,3,0,12,0
|
||||
addl out8=@ltoff(AES_Td#),gp
|
||||
.save ar.lc,r3
|
||||
mov r3=ar.lc }
|
||||
{ .mmi; and out0=3,in0
|
||||
ADDP in0=0,in0
|
||||
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
|
||||
.save ar.pfs,pfssave
|
||||
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
|
||||
and out0=3,in0
|
||||
mov r3=ip }
|
||||
{ .mmi; ADDP in0=0,in0
|
||||
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
|
||||
.save ar.lc,lcsave
|
||||
mov lcsave=ar.lc };;
|
||||
|
||||
.body
|
||||
{ .mmi; ld8 out8=[out8] // Te0
|
||||
ld4 out11=[out11] // AES_KEY->rounds
|
||||
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
|
||||
add out8=(AES_Td#-AES_decrypt#),r3 // Td0
|
||||
.save pr,prsave
|
||||
mov prsave=pr }
|
||||
|
||||
.body
|
||||
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
|
||||
{ .mib; cmp.ne p6,p0=out0,r0
|
||||
add out0=4,in0
|
||||
@ -507,8 +517,8 @@ AES_decrypt:
|
||||
ADDP in1=0,in1
|
||||
(p6) br.spnt .Ld_o_unaligned };;
|
||||
|
||||
{ .mii; mov ar.pfs=r2
|
||||
mov ar.lc=r3 }
|
||||
{ .mii; mov ar.pfs=pfssave
|
||||
mov ar.lc=lcsave }
|
||||
{ .mmi; st4 [in1]=r16,8 // s0
|
||||
st4 [in0]=r20,8 // s1
|
||||
mov pr=prsave,0x1ffff };;
|
||||
@ -591,10 +601,10 @@ AES_decrypt:
|
||||
mov pr=prsave,0x1ffff }//;;
|
||||
{ .mmi; st1 [out1]=r26,4
|
||||
st1 [out0]=r27,4
|
||||
mov ar.pfs=r2 };;
|
||||
mov ar.pfs=pfssave };;
|
||||
{ .mmi; st1 [out3]=r28
|
||||
st1 [out2]=r29
|
||||
mov ar.lc=r3 }//;;
|
||||
mov ar.lc=lcsave }//;;
|
||||
{ .mmb; st1 [out1]=r30
|
||||
st1 [out0]=r31
|
||||
br.ret.sptk.many b0 };;
|
||||
|
@ -110,6 +110,8 @@ $code=<<___;
|
||||
.explicit
|
||||
.text
|
||||
|
||||
pfssave=r2;
|
||||
lcsave=r3;
|
||||
prsave=r14;
|
||||
K=r15;
|
||||
A=r16; B=r17; C=r18; D=r19;
|
||||
@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants
|
||||
.align 32
|
||||
$func:
|
||||
.prologue
|
||||
.save ar.pfs,r2
|
||||
{ .mmi; alloc r2=ar.pfs,3,17,0,16
|
||||
.save ar.pfs,pfssave
|
||||
{ .mmi; alloc pfssave=ar.pfs,3,17,0,16
|
||||
$ADDP ctx=0,r32 // 1st arg
|
||||
.save ar.lc,r3
|
||||
mov r3=ar.lc }
|
||||
.save ar.lc,lcsave
|
||||
mov lcsave=ar.lc }
|
||||
{ .mmi; $ADDP input=0,r33 // 2nd arg
|
||||
addl Ktbl=\@ltoff($TABLE#),gp
|
||||
mov num=r34 // 3rd arg
|
||||
.save pr,prsave
|
||||
mov prsave=pr };;
|
||||
|
||||
.body
|
||||
{ .mii; ld8 Ktbl=[Ktbl]
|
||||
mov num=r34 };; // 3rd arg
|
||||
|
||||
{ .mib; add r8=0*$SZ,ctx
|
||||
add r9=1*$SZ,ctx
|
||||
brp.loop.imp .L_first16,.L_first16_ctop
|
||||
@ -151,20 +150,23 @@ $func:
|
||||
brp.loop.imp .L_rest,.L_rest_ctop
|
||||
};;
|
||||
// load A-H
|
||||
.Lpic_point:
|
||||
{ .mmi; $LDW A=[r8],4*$SZ
|
||||
$LDW B=[r9],4*$SZ
|
||||
mov sgm0=$sigma0[2] }
|
||||
mov Ktbl=ip }
|
||||
{ .mmi; $LDW C=[r10],4*$SZ
|
||||
$LDW D=[r11],4*$SZ
|
||||
mov sgm1=$sigma1[2] };;
|
||||
mov sgm0=$sigma0[2] };;
|
||||
{ .mmi; $LDW E=[r8]
|
||||
$LDW F=[r9] }
|
||||
$LDW F=[r9]
|
||||
add Ktbl=($TABLE#-.Lpic_point),Ktbl }
|
||||
{ .mmi; $LDW G=[r10]
|
||||
$LDW H=[r11]
|
||||
cmp.ne p15,p14=0,r35 };; // used in sha256_block
|
||||
|
||||
.L_outer:
|
||||
{ .mii; mov ar.lc=15
|
||||
{ .mii; mov sgm1=$sigma1[2]
|
||||
mov ar.lc=15
|
||||
mov ar.ec=1 };;
|
||||
.align 32
|
||||
.L_first16:
|
||||
@ -329,7 +331,7 @@ $code.=<<___;
|
||||
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
|
||||
{ .mmi; $LDW r38=[r10],-4*$SZ
|
||||
$LDW r39=[r11],-4*$SZ
|
||||
(p7) mov ar.lc=r3 };;
|
||||
(p7) mov ar.lc=lcsave };;
|
||||
{ .mmi; add A=A,r32
|
||||
add B=B,r33
|
||||
add C=C,r34 }
|
||||
|
Loading…
Reference in New Issue
Block a user